[RFC] event/dsw: support explicit release only mode

DPDK patches and discussions
 help / color / mirror / Atom feed

* [RFC] event/dsw: support explicit release only mode
@ 2023-11-09 18:33 Mattias Rönnblom
  2024-05-24 19:24 ` [PATCH] " Mattias Rönnblom
  0 siblings, 1 reply; 10+ messages in thread
From: Mattias Rönnblom @ 2023-11-09 18:33 UTC (permalink / raw)
  To: dev
  Cc: hofors, bruce.richardson, Jerin Jacob, Peter Nilsson J,
	Svante Järvstråt, Heng Wang, Mattias Rönnblom

Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
DSW event device.

This feature may be used by an EAL thread to pull more work from the
work scheduler, without giving up the option to forward events
originating from a previous dequeue batch. This in turn allows an EAL
thread to be productive while waiting for a hardware accelerator to
complete some operation.

Prior to this change, DSW didn't make any distinction between
RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
new events would be backpressured earlier.

After this change, DSW tracks the number of released events (i.e.,
events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELASE) that has
been enqueued.

To reduce overhead, DSW does not track the *identity* of individual
events. This in turn implies that a certain stage in the flow
migration process, DSW must wait for all pending releases (on the
migration source port, only) to be received from the application, to
assure that no event pertaining to any of the to-be-migrated flows are
being processed.

With this change, DSW starts making a distinction between forward and
new type events for credit allocation purposes. Only new events needs
credits. All events marked as RTE_EVENT_OP_FORWARD must have a
corresponding dequeued event from a previous dequeue batch.

Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
unaffected by this change.

A side-effect of the tweaked DSW migration logic is that the migration
latency is reduced, regardless if implicit relase is enabled or not.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 drivers/event/dsw/dsw_evdev.c |  8 +++-
 drivers/event/dsw/dsw_evdev.h |  3 ++
 drivers/event/dsw/dsw_event.c | 84 ++++++++++++++++++++++-------------
 3 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index 1209e73a9d..445f3ac357 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -23,15 +23,20 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
 	struct rte_event_ring *in_ring;
 	struct rte_ring *ctl_in_ring;
 	char ring_name[RTE_RING_NAMESIZE];
+	bool implicit_release;
 
 	port = &dsw->ports[port_id];
 
+	implicit_release =
+	    !(conf->event_port_cfg & RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
+
 	*port = (struct dsw_port) {
 		.id = port_id,
 		.dsw = dsw,
 		.dequeue_depth = conf->dequeue_depth,
 		.enqueue_depth = conf->enqueue_depth,
-		.new_event_threshold = conf->new_event_threshold
+		.new_event_threshold = conf->new_event_threshold,
+		.implicit_release = implicit_release
 	};
 
 	snprintf(ring_name, sizeof(ring_name), "dsw%d_p%u", dev->data->dev_id,
@@ -221,6 +226,7 @@ dsw_info_get(struct rte_eventdev *dev __rte_unused,
 		.max_profiles_per_port = 1,
 		.event_dev_cap = RTE_EVENT_DEV_CAP_BURST_MODE|
 		RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED|
+		RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE|
 		RTE_EVENT_DEV_CAP_NONSEQ_MODE|
 		RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT|
 		RTE_EVENT_DEV_CAP_CARRY_FLOW_ID
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index 6416a8a898..a245a8940e 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -128,6 +128,7 @@ struct dsw_queue_flow {
 enum dsw_migration_state {
 	DSW_MIGRATION_STATE_IDLE,
 	DSW_MIGRATION_STATE_PAUSING,
+	DSW_MIGRATION_STATE_FINISH_PENDING,
 	DSW_MIGRATION_STATE_UNPAUSING
 };
 
@@ -148,6 +149,8 @@ struct dsw_port {
 
 	int32_t new_event_threshold;
 
+	bool implicit_release;
+
 	uint16_t pending_releases;
 
 	uint16_t next_parallel_flow_id;
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index 93bbeead2e..c70e50dd16 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -1141,6 +1141,15 @@ dsw_port_move_emigrating_flows(struct dsw_evdev *dsw,
 	source_port->migration_state = DSW_MIGRATION_STATE_UNPAUSING;
 }
 
+static void
+dsw_port_try_finish_pending(struct dsw_evdev *dsw, struct dsw_port *source_port)
+{
+	if (unlikely(source_port->migration_state ==
+		     DSW_MIGRATION_STATE_FINISH_PENDING &&
+		     source_port->pending_releases == 0))
+		dsw_port_move_emigrating_flows(dsw, source_port);
+}
+
 static void
 dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
 {
@@ -1149,14 +1158,15 @@ dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
 	if (port->cfm_cnt == (dsw->num_ports-1)) {
 		switch (port->migration_state) {
 		case DSW_MIGRATION_STATE_PAUSING:
-			dsw_port_move_emigrating_flows(dsw, port);
+			port->migration_state =
+				DSW_MIGRATION_STATE_FINISH_PENDING;
 			break;
 		case DSW_MIGRATION_STATE_UNPAUSING:
 			dsw_port_end_emigration(dsw, port,
 						RTE_SCHED_TYPE_ATOMIC);
 			break;
 		default:
-			RTE_ASSERT(0);
+			RTE_VERIFY(0);
 			break;
 		}
 	}
@@ -1195,19 +1205,18 @@ dsw_port_note_op(struct dsw_port *port, uint16_t num_events)
 static void
 dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port)
 {
-	/* For simplicity (in the migration logic), avoid all
-	 * background processing in case event processing is in
-	 * progress.
-	 */
-	if (port->pending_releases > 0)
-		return;
-
 	/* Polling the control ring is relatively inexpensive, and
 	 * polling it often helps bringing down migration latency, so
 	 * do this for every iteration.
 	 */
 	dsw_port_ctl_process(dsw, port);
 
+	/* Always check if a migration is waiting for pending releases
+	 * to arrive, to keep the time at which dequeuing new events
+	 * from the port is disabled.
+	 */
+	dsw_port_try_finish_pending(dsw, port);
+
 	/* To avoid considering migration and flushing output buffers
 	 * on every dequeue/enqueue call, the scheduler only performs
 	 * such 'background' tasks every nth
@@ -1252,8 +1261,8 @@ static __rte_always_inline uint16_t
 dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 				const struct rte_event events[],
 				uint16_t events_len, bool op_types_known,
-				uint16_t num_new, uint16_t num_release,
-				uint16_t num_non_release)
+				uint16_t num_new, uint16_t num_forward,
+				uint16_t num_release)
 {
 	struct dsw_evdev *dsw = source_port->dsw;
 	bool enough_credits;
@@ -1287,14 +1296,14 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 	if (!op_types_known)
 		for (i = 0; i < events_len; i++) {
 			switch (events[i].op) {
-			case RTE_EVENT_OP_RELEASE:
-				num_release++;
-				break;
 			case RTE_EVENT_OP_NEW:
 				num_new++;
-				/* Falls through. */
-			default:
-				num_non_release++;
+				break;
+			case RTE_EVENT_OP_FORWARD:
+				num_forward++;
+				break;
+			case RTE_EVENT_OP_RELEASE:
+				num_release++;
 				break;
 			}
 		}
@@ -1309,15 +1318,15 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 		     source_port->new_event_threshold))
 		return 0;
 
-	enough_credits = dsw_port_acquire_credits(dsw, source_port,
-						  num_non_release);
+	enough_credits = dsw_port_acquire_credits(dsw, source_port, num_new);
 	if (unlikely(!enough_credits))
 		return 0;
 
-	source_port->pending_releases -= num_release;
+	dsw_port_return_credits(dsw, source_port, num_release);
+
+	source_port->pending_releases -= (num_forward + num_release);
 
-	dsw_port_enqueue_stats(source_port, num_new,
-			       num_non_release-num_new, num_release);
+	dsw_port_enqueue_stats(source_port, num_new, num_forward, num_release);
 
 	for (i = 0; i < events_len; i++) {
 		const struct rte_event *event = &events[i];
@@ -1329,9 +1338,9 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 	}
 
 	DSW_LOG_DP_PORT(DEBUG, source_port->id, "%d non-release events "
-			"accepted.\n", num_non_release);
+			"accepted.\n", num_new + num_forward);
 
-	return (num_non_release + num_release);
+	return (num_new + num_forward + num_release);
 }
 
 uint16_t
@@ -1358,7 +1367,7 @@ dsw_event_enqueue_new_burst(void *port, const struct rte_event events[],
 
 	return dsw_event_enqueue_burst_generic(source_port, events,
 					       events_len, true, events_len,
-					       0, events_len);
+					       0, 0);
 }
 
 uint16_t
@@ -1371,8 +1380,8 @@ dsw_event_enqueue_forward_burst(void *port, const struct rte_event events[],
 		events_len = source_port->enqueue_depth;
 
 	return dsw_event_enqueue_burst_generic(source_port, events,
-					       events_len, true, 0, 0,
-					       events_len);
+					       events_len, true, 0,
+					       events_len, 0);
 }
 
 uint16_t
@@ -1484,21 +1493,34 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 	struct dsw_evdev *dsw = source_port->dsw;
 	uint16_t dequeued;
 
-	source_port->pending_releases = 0;
+	if (source_port->implicit_release) {
+		dsw_port_return_credits(dsw, port,
+					source_port->pending_releases);
+
+		source_port->pending_releases = 0;
+	}
 
 	dsw_port_bg_process(dsw, source_port);
 
 	if (unlikely(num > source_port->dequeue_depth))
 		num = source_port->dequeue_depth;
 
-	dequeued = dsw_port_dequeue_burst(source_port, events, num);
+	if (unlikely(source_port->migration_state ==
+		     DSW_MIGRATION_STATE_FINISH_PENDING))
+		/* Do not take on new work - only finish outstanding
+		 * (unreleased) events, to allow the migration
+		 * procedure to complete.
+		 */
+		dequeued = 0;
+	else
+		dequeued = dsw_port_dequeue_burst(source_port, events, num);
 
 	if (unlikely(source_port->migration_state ==
 		     DSW_MIGRATION_STATE_PAUSING))
 		dsw_port_stash_migrating_events(source_port, events,
 						&dequeued);
 
-	source_port->pending_releases = dequeued;
+	source_port->pending_releases += dequeued;
 
 	dsw_port_load_record(source_port, dequeued);
 
@@ -1508,8 +1530,6 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n",
 				dequeued);
 
-		dsw_port_return_credits(dsw, source_port, dequeued);
-
 		/* One potential optimization one might think of is to
 		 * add a migration state (prior to 'pausing'), and
 		 * only record seen events when the port is in this
-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] event/dsw: support explicit release only mode
  2023-11-09 18:33 [RFC] event/dsw: support explicit release only mode Mattias Rönnblom
@ 2024-05-24 19:24 ` Mattias Rönnblom
  2024-05-27 15:35   ` Jerin Jacob
  2024-06-05 13:38   ` [PATCH v2] " Mattias Rönnblom
  0 siblings, 2 replies; 10+ messages in thread
From: Mattias Rönnblom @ 2024-05-24 19:24 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: dev, hofors, bruce.richardson, Peter Nilsson J,
	Svante Järvstråt, Heng Wang, Mattias Rönnblom

Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
DSW event device.

This feature may be used by an EAL thread to pull more work from the
work scheduler, without giving up the option to forward events
originating from a previous dequeue batch. This in turn allows an EAL
thread to be productive while waiting for a hardware accelerator to
complete some operation.

Prior to this change, DSW didn't make any distinction between
RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
new events would be backpressured earlier.

After this change, DSW tracks the number of released events (i.e.,
events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELASE) that has
been enqueued.

For efficency reasons, DSW does not track the *identity* of individual
events. This in turn implies that a certain stage in the flow
migration process, DSW must wait for all pending releases (on the
migration source port, only) to be received from the application, to
assure that no event pertaining to any of the to-be-migrated flows are
being processed.

With this change, DSW starts making a distinction between forward and
new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
have a corresponding dequeued event from a previous dequeue batch.

Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
unaffected by this change.

A side-effect of the tweaked DSW migration logic is that the migration
latency is reduced, regardless if implicit relase is enabled or not.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 drivers/event/dsw/dsw_evdev.c |  8 +++-
 drivers/event/dsw/dsw_evdev.h |  3 ++
 drivers/event/dsw/dsw_event.c | 84 ++++++++++++++++++++++-------------
 3 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index ab0420b549..0dea1091e3 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -23,15 +23,20 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
 	struct rte_event_ring *in_ring;
 	struct rte_ring *ctl_in_ring;
 	char ring_name[RTE_RING_NAMESIZE];
+	bool implicit_release;
 
 	port = &dsw->ports[port_id];
 
+	implicit_release =
+	    !(conf->event_port_cfg & RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
+
 	*port = (struct dsw_port) {
 		.id = port_id,
 		.dsw = dsw,
 		.dequeue_depth = conf->dequeue_depth,
 		.enqueue_depth = conf->enqueue_depth,
-		.new_event_threshold = conf->new_event_threshold
+		.new_event_threshold = conf->new_event_threshold,
+		.implicit_release = implicit_release
 	};
 
 	snprintf(ring_name, sizeof(ring_name), "dsw%d_p%u", dev->data->dev_id,
@@ -222,6 +227,7 @@ dsw_info_get(struct rte_eventdev *dev __rte_unused,
 		RTE_EVENT_DEV_CAP_ATOMIC |
 		RTE_EVENT_DEV_CAP_PARALLEL |
 		RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED|
+		RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE|
 		RTE_EVENT_DEV_CAP_NONSEQ_MODE|
 		RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT|
 		RTE_EVENT_DEV_CAP_CARRY_FLOW_ID
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index 2018306265..d0d59478eb 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -128,6 +128,7 @@ struct dsw_queue_flow {
 enum dsw_migration_state {
 	DSW_MIGRATION_STATE_IDLE,
 	DSW_MIGRATION_STATE_PAUSING,
+	DSW_MIGRATION_STATE_FINISH_PENDING,
 	DSW_MIGRATION_STATE_UNPAUSING
 };
 
@@ -148,6 +149,8 @@ struct __rte_cache_aligned dsw_port {
 
 	int32_t new_event_threshold;
 
+	bool implicit_release;
+
 	uint16_t pending_releases;
 
 	uint16_t next_parallel_flow_id;
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index ca2b8e1032..f23079fd73 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -1149,6 +1149,15 @@ dsw_port_move_emigrating_flows(struct dsw_evdev *dsw,
 	source_port->migration_state = DSW_MIGRATION_STATE_UNPAUSING;
 }
 
+static void
+dsw_port_try_finish_pending(struct dsw_evdev *dsw, struct dsw_port *source_port)
+{
+	if (unlikely(source_port->migration_state ==
+		     DSW_MIGRATION_STATE_FINISH_PENDING &&
+		     source_port->pending_releases == 0))
+		dsw_port_move_emigrating_flows(dsw, source_port);
+}
+
 static void
 dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
 {
@@ -1157,14 +1166,15 @@ dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
 	if (port->cfm_cnt == (dsw->num_ports-1)) {
 		switch (port->migration_state) {
 		case DSW_MIGRATION_STATE_PAUSING:
-			dsw_port_move_emigrating_flows(dsw, port);
+			port->migration_state =
+				DSW_MIGRATION_STATE_FINISH_PENDING;
 			break;
 		case DSW_MIGRATION_STATE_UNPAUSING:
 			dsw_port_end_emigration(dsw, port,
 						RTE_SCHED_TYPE_ATOMIC);
 			break;
 		default:
-			RTE_ASSERT(0);
+			RTE_VERIFY(0);
 			break;
 		}
 	}
@@ -1203,19 +1213,18 @@ dsw_port_note_op(struct dsw_port *port, uint16_t num_events)
 static void
 dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port)
 {
-	/* For simplicity (in the migration logic), avoid all
-	 * background processing in case event processing is in
-	 * progress.
-	 */
-	if (port->pending_releases > 0)
-		return;
-
 	/* Polling the control ring is relatively inexpensive, and
 	 * polling it often helps bringing down migration latency, so
 	 * do this for every iteration.
 	 */
 	dsw_port_ctl_process(dsw, port);
 
+	/* Always check if a migration is waiting for pending releases
+	 * to arrive, to minimize the amount of time dequeuing events
+	 * from the port is disabled.
+	 */
+	dsw_port_try_finish_pending(dsw, port);
+
 	/* To avoid considering migration and flushing output buffers
 	 * on every dequeue/enqueue call, the scheduler only performs
 	 * such 'background' tasks every nth
@@ -1260,8 +1269,8 @@ static __rte_always_inline uint16_t
 dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 				const struct rte_event events[],
 				uint16_t events_len, bool op_types_known,
-				uint16_t num_new, uint16_t num_release,
-				uint16_t num_non_release)
+				uint16_t num_new, uint16_t num_forward,
+				uint16_t num_release)
 {
 	struct dsw_evdev *dsw = source_port->dsw;
 	bool enough_credits;
@@ -1295,14 +1304,14 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 	if (!op_types_known)
 		for (i = 0; i < events_len; i++) {
 			switch (events[i].op) {
-			case RTE_EVENT_OP_RELEASE:
-				num_release++;
-				break;
 			case RTE_EVENT_OP_NEW:
 				num_new++;
-				/* Falls through. */
-			default:
-				num_non_release++;
+				break;
+			case RTE_EVENT_OP_FORWARD:
+				num_forward++;
+				break;
+			case RTE_EVENT_OP_RELEASE:
+				num_release++;
 				break;
 			}
 		}
@@ -1318,15 +1327,15 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 		     source_port->new_event_threshold))
 		return 0;
 
-	enough_credits = dsw_port_acquire_credits(dsw, source_port,
-						  num_non_release);
+	enough_credits = dsw_port_acquire_credits(dsw, source_port, num_new);
 	if (unlikely(!enough_credits))
 		return 0;
 
-	source_port->pending_releases -= num_release;
+	dsw_port_return_credits(dsw, source_port, num_release);
+
+	source_port->pending_releases -= (num_forward + num_release);
 
-	dsw_port_enqueue_stats(source_port, num_new,
-			       num_non_release-num_new, num_release);
+	dsw_port_enqueue_stats(source_port, num_new, num_forward, num_release);
 
 	for (i = 0; i < events_len; i++) {
 		const struct rte_event *event = &events[i];
@@ -1338,9 +1347,9 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 	}
 
 	DSW_LOG_DP_PORT(DEBUG, source_port->id, "%d non-release events "
-			"accepted.\n", num_non_release);
+			"accepted.\n", num_new + num_forward);
 
-	return (num_non_release + num_release);
+	return (num_new + num_forward + num_release);
 }
 
 uint16_t
@@ -1367,7 +1376,7 @@ dsw_event_enqueue_new_burst(void *port, const struct rte_event events[],
 
 	return dsw_event_enqueue_burst_generic(source_port, events,
 					       events_len, true, events_len,
-					       0, events_len);
+					       0, 0);
 }
 
 uint16_t
@@ -1380,8 +1389,8 @@ dsw_event_enqueue_forward_burst(void *port, const struct rte_event events[],
 		events_len = source_port->enqueue_depth;
 
 	return dsw_event_enqueue_burst_generic(source_port, events,
-					       events_len, true, 0, 0,
-					       events_len);
+					       events_len, true, 0,
+					       events_len, 0);
 }
 
 uint16_t
@@ -1493,21 +1502,34 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 	struct dsw_evdev *dsw = source_port->dsw;
 	uint16_t dequeued;
 
-	source_port->pending_releases = 0;
+	if (source_port->implicit_release) {
+		dsw_port_return_credits(dsw, port,
+					source_port->pending_releases);
+
+		source_port->pending_releases = 0;
+	}
 
 	dsw_port_bg_process(dsw, source_port);
 
 	if (unlikely(num > source_port->dequeue_depth))
 		num = source_port->dequeue_depth;
 
-	dequeued = dsw_port_dequeue_burst(source_port, events, num);
+	if (unlikely(source_port->migration_state ==
+		     DSW_MIGRATION_STATE_FINISH_PENDING))
+		/* Do not take on new work - only finish outstanding
+		 * (unreleased) events, to allow the migration
+		 * procedure to complete.
+		 */
+		dequeued = 0;
+	else
+		dequeued = dsw_port_dequeue_burst(source_port, events, num);
 
 	if (unlikely(source_port->migration_state ==
 		     DSW_MIGRATION_STATE_PAUSING))
 		dsw_port_stash_migrating_events(source_port, events,
 						&dequeued);
 
-	source_port->pending_releases = dequeued;
+	source_port->pending_releases += dequeued;
 
 	dsw_port_load_record(source_port, dequeued);
 
@@ -1517,8 +1539,6 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n",
 				dequeued);
 
-		dsw_port_return_credits(dsw, source_port, dequeued);
-
 		/* One potential optimization one might think of is to
 		 * add a migration state (prior to 'pausing'), and
 		 * only record seen events when the port is in this
-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] event/dsw: support explicit release only mode
  2024-05-24 19:24 ` [PATCH] " Mattias Rönnblom
@ 2024-05-27 15:35   ` Jerin Jacob
  2024-05-27 16:08     ` Mattias Rönnblom
  2024-06-05 13:38   ` [PATCH v2] " Mattias Rönnblom
  1 sibling, 1 reply; 10+ messages in thread
From: Jerin Jacob @ 2024-05-27 15:35 UTC (permalink / raw)
  To: Mattias Rönnblom
  Cc: Jerin Jacob, dev, hofors, bruce.richardson, Peter Nilsson J,
	Svante Järvstråt, Heng Wang

On Sat, May 25, 2024 at 1:13 AM Mattias Rönnblom
<mattias.ronnblom@ericsson.com> wrote:
>
> Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
> DSW event device.
>
> This feature may be used by an EAL thread to pull more work from the
> work scheduler, without giving up the option to forward events
> originating from a previous dequeue batch. This in turn allows an EAL
> thread to be productive while waiting for a hardware accelerator to
> complete some operation.
>
> Prior to this change, DSW didn't make any distinction between
> RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
> new events would be backpressured earlier.
>
> After this change, DSW tracks the number of released events (i.e.,
> events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELASE) that has
> been enqueued.
>
> For efficency reasons, DSW does not track the *identity* of individual
> events. This in turn implies that a certain stage in the flow
> migration process, DSW must wait for all pending releases (on the
> migration source port, only) to be received from the application, to
> assure that no event pertaining to any of the to-be-migrated flows are
> being processed.
>
> With this change, DSW starts making a distinction between forward and
> new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
> events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
> have a corresponding dequeued event from a previous dequeue batch.
>
> Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
> unaffected by this change.
>
> A side-effect of the tweaked DSW migration logic is that the migration
> latency is reduced, regardless if implicit relase is enabled or not.
>
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>


1) Update releases for PMD specific section for this new feature
2) Fix CI issue as applicable

https://patches.dpdk.org/project/dpdk/patch/20240524192437.183960-1-mattias.ronnblom@ericsson.com/
http://mails.dpdk.org/archives/test-report/2024-May/672848.html
https://github.com/ovsrobot/dpdk/actions/runs/9229147658

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] event/dsw: support explicit release only mode
  2024-05-27 15:35   ` Jerin Jacob
@ 2024-05-27 16:08     ` Mattias Rönnblom
  2024-05-27 17:17       ` Jerin Jacob
  0 siblings, 1 reply; 10+ messages in thread
From: Mattias Rönnblom @ 2024-05-27 16:08 UTC (permalink / raw)
  To: Jerin Jacob, Mattias Rönnblom
  Cc: Jerin Jacob, dev, bruce.richardson, Peter Nilsson J,
	Svante Järvstråt, Heng Wang

On 2024-05-27 17:35, Jerin Jacob wrote:
> On Sat, May 25, 2024 at 1:13 AM Mattias Rönnblom
> <mattias.ronnblom@ericsson.com> wrote:
>>
>> Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
>> DSW event device.
>>
>> This feature may be used by an EAL thread to pull more work from the
>> work scheduler, without giving up the option to forward events
>> originating from a previous dequeue batch. This in turn allows an EAL
>> thread to be productive while waiting for a hardware accelerator to
>> complete some operation.
>>
>> Prior to this change, DSW didn't make any distinction between
>> RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
>> new events would be backpressured earlier.
>>
>> After this change, DSW tracks the number of released events (i.e.,
>> events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELASE) that has
>> been enqueued.
>>
>> For efficency reasons, DSW does not track the *identity* of individual
>> events. This in turn implies that a certain stage in the flow
>> migration process, DSW must wait for all pending releases (on the
>> migration source port, only) to be received from the application, to
>> assure that no event pertaining to any of the to-be-migrated flows are
>> being processed.
>>
>> With this change, DSW starts making a distinction between forward and
>> new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
>> events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
>> have a corresponding dequeued event from a previous dequeue batch.
>>
>> Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
>> unaffected by this change.
>>
>> A side-effect of the tweaked DSW migration logic is that the migration
>> latency is reduced, regardless if implicit relase is enabled or not.
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> 
> 
> 1) Update releases for PMD specific section for this new feature

Should the release note update be in the same patch, or a separate?

> 2) Fix CI issue as applicable
> 
> https://patches.dpdk.org/project/dpdk/patch/20240524192437.183960-1-mattias.ronnblom@ericsson.com/
> http://mails.dpdk.org/archives/test-report/2024-May/672848.html
> https://github.com/ovsrobot/dpdk/actions/runs/9229147658

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] event/dsw: support explicit release only mode
  2024-05-27 16:08     ` Mattias Rönnblom
@ 2024-05-27 17:17       ` Jerin Jacob
  0 siblings, 0 replies; 10+ messages in thread
From: Jerin Jacob @ 2024-05-27 17:17 UTC (permalink / raw)
  To: Mattias Rönnblom
  Cc: Mattias Rönnblom, Jerin Jacob, dev, bruce.richardson,
	Peter Nilsson J, Svante Järvstråt, Heng Wang

On Mon, May 27, 2024 at 9:38 PM Mattias Rönnblom <hofors@lysator.liu.se> wrote:
>
> On 2024-05-27 17:35, Jerin Jacob wrote:
> > On Sat, May 25, 2024 at 1:13 AM Mattias Rönnblom
> > <mattias.ronnblom@ericsson.com> wrote:
> >>
> >> Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
> >> DSW event device.
> >>
> >> This feature may be used by an EAL thread to pull more work from the
> >> work scheduler, without giving up the option to forward events
> >> originating from a previous dequeue batch. This in turn allows an EAL
> >> thread to be productive while waiting for a hardware accelerator to
> >> complete some operation.
> >>
> >> Prior to this change, DSW didn't make any distinction between
> >> RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
> >> new events would be backpressured earlier.
> >>
> >> After this change, DSW tracks the number of released events (i.e.,
> >> events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELASE) that has
> >> been enqueued.
> >>
> >> For efficency reasons, DSW does not track the *identity* of individual
> >> events. This in turn implies that a certain stage in the flow
> >> migration process, DSW must wait for all pending releases (on the
> >> migration source port, only) to be received from the application, to
> >> assure that no event pertaining to any of the to-be-migrated flows are
> >> being processed.
> >>
> >> With this change, DSW starts making a distinction between forward and
> >> new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
> >> events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
> >> have a corresponding dequeued event from a previous dequeue batch.
> >>
> >> Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
> >> unaffected by this change.
> >>
> >> A side-effect of the tweaked DSW migration logic is that the migration
> >> latency is reduced, regardless if implicit relase is enabled or not.
> >>
> >> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> >
> >
> > 1) Update releases for PMD specific section for this new feature
>
> Should the release note update be in the same patch, or a separate?

Same patch.


>
> > 2) Fix CI issue as applicable
> >
> > https://patches.dpdk.org/project/dpdk/patch/20240524192437.183960-1-mattias.ronnblom@ericsson.com/
> > http://mails.dpdk.org/archives/test-report/2024-May/672848.html
> > https://github.com/ovsrobot/dpdk/actions/runs/9229147658

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] event/dsw: support explicit release only mode
  2024-05-24 19:24 ` [PATCH] " Mattias Rönnblom
  2024-05-27 15:35   ` Jerin Jacob
@ 2024-06-05 13:38   ` Mattias Rönnblom
  2024-06-06  9:59     ` Jerin Jacob
  2024-06-07 13:36     ` [PATCH v3] " Mattias Rönnblom
  1 sibling, 2 replies; 10+ messages in thread
From: Mattias Rönnblom @ 2024-06-05 13:38 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: dev, hofors, bruce.richardson, Peter Nilsson J,
	Svante Järvstråt, Heng Wang, Mattias Rönnblom

Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
DSW event device.

This feature may be used by an EAL thread to pull more work from the
work scheduler, without giving up the option to forward events
originating from a previous dequeue batch. This in turn allows an EAL
thread to be productive while waiting for a hardware accelerator to
complete some operation.

Prior to this change, DSW didn't make any distinction between
RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
new events would be backpressured earlier.

After this change, DSW tracks the number of released events (i.e.,
events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE) that has
been enqueued.

For efficiency reasons, DSW does not track the identity of individual
events. This in turn implies that a certain stage in the flow
migration process, DSW must wait for all pending releases (on the
migration source port, only) to be received from the application, to
assure that no event pertaining to any of the to-be-migrated flows are
being processed.

With this change, DSW starts making a distinction between forward and
new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
have a corresponding dequeued event from a previous dequeue batch.

Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
unaffected by this change.

A side-effect of the tweaked DSW migration logic is that the migration
latency is reduced, regardless if implicit release is enabled or not.

Another side-effect is that migrated flows are now not processed
during any part of the migration procedure. An upside of this change
it reduces the load of the overloaded port. A downside is it
introduces slightly more jitter for the migrated flows.

This patch is contains various minor refactorings, improved
formatting, fixed spelling, and the removal of unnessary memory
barriers.

v2:
  * Remove redundant memory barriers.
  * Discontinue processing of migrated flows throughout the migration
    procedure. This is a part of a fix to address a reordering issue
    v1 of this patch introduced.
  * Added entry in the release notes.
  * Fix spelling issues in commit message.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 doc/guides/rel_notes/release_24_07.rst |   7 +
 drivers/event/dsw/dsw_evdev.c          |   8 +-
 drivers/event/dsw/dsw_evdev.h          |   7 +-
 drivers/event/dsw/dsw_event.c          | 405 ++++++++++++++-----------
 4 files changed, 254 insertions(+), 173 deletions(-)

diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst
index a69f24cf99..706cc71212 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -24,6 +24,13 @@ DPDK Release 24.07
 New Features
 ------------
 
+* **Updated the DSW event device.**
+
+  * Added support for ``RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE``,
+    allowing applications to take on new tasks without having completed
+    (released) the previous event batch. This in turn facilities DSW
+    use alongside high-latency look-aside hardware accelerators.
+
 .. This section should contain new features added in this release.
    Sample format:
 
diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index ab0420b549..0dea1091e3 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -23,15 +23,20 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
 	struct rte_event_ring *in_ring;
 	struct rte_ring *ctl_in_ring;
 	char ring_name[RTE_RING_NAMESIZE];
+	bool implicit_release;
 
 	port = &dsw->ports[port_id];
 
+	implicit_release =
+	    !(conf->event_port_cfg & RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
+
 	*port = (struct dsw_port) {
 		.id = port_id,
 		.dsw = dsw,
 		.dequeue_depth = conf->dequeue_depth,
 		.enqueue_depth = conf->enqueue_depth,
-		.new_event_threshold = conf->new_event_threshold
+		.new_event_threshold = conf->new_event_threshold,
+		.implicit_release = implicit_release
 	};
 
 	snprintf(ring_name, sizeof(ring_name), "dsw%d_p%u", dev->data->dev_id,
@@ -222,6 +227,7 @@ dsw_info_get(struct rte_eventdev *dev __rte_unused,
 		RTE_EVENT_DEV_CAP_ATOMIC |
 		RTE_EVENT_DEV_CAP_PARALLEL |
 		RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED|
+		RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE|
 		RTE_EVENT_DEV_CAP_NONSEQ_MODE|
 		RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT|
 		RTE_EVENT_DEV_CAP_CARRY_FLOW_ID
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index 2018306265..c9bf4f8b6b 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -127,6 +127,7 @@ struct dsw_queue_flow {
 
 enum dsw_migration_state {
 	DSW_MIGRATION_STATE_IDLE,
+	DSW_MIGRATION_STATE_FINISH_PENDING,
 	DSW_MIGRATION_STATE_PAUSING,
 	DSW_MIGRATION_STATE_UNPAUSING
 };
@@ -148,6 +149,8 @@ struct __rte_cache_aligned dsw_port {
 
 	int32_t new_event_threshold;
 
+	bool implicit_release;
+
 	uint16_t pending_releases;
 
 	uint16_t next_parallel_flow_id;
@@ -255,8 +258,8 @@ struct dsw_evdev {
 	alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) credits_on_loan;
 };
 
-#define DSW_CTL_PAUS_REQ (0)
-#define DSW_CTL_UNPAUS_REQ (1)
+#define DSW_CTL_PAUSE_REQ (0)
+#define DSW_CTL_UNPAUSE_REQ (1)
 #define DSW_CTL_CFM (2)
 
 struct __rte_aligned(4) dsw_ctl_msg {
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index ca2b8e1032..d06a615fb8 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -275,7 +275,7 @@ dsw_port_add_paused_flows(struct dsw_port *port, struct dsw_queue_flow *qfs,
 
 static void
 dsw_port_remove_paused_flow(struct dsw_port *port,
-			    struct dsw_queue_flow *target_qf)
+			    const struct dsw_queue_flow *target_qf)
 {
 	uint16_t i;
 
@@ -302,6 +302,7 @@ dsw_port_remove_paused_flow(struct dsw_port *port,
 	DSW_LOG_DP_PORT(ERR, port->id,
 			"Failed to unpause queue_id %d flow_hash %d.\n",
 			target_qf->queue_id, target_qf->flow_hash);
+	RTE_VERIFY(0);
 }
 
 static void
@@ -602,7 +603,7 @@ dsw_port_transmit_buffered(struct dsw_evdev *dsw, struct dsw_port *source_port,
 		return;
 
 	/* The rings are dimensioned to fit all in-flight events (even
-	 * on a single ring), so looping will work.
+	 * on a single ring).
 	 */
 	rte_event_ring_enqueue_bulk(dest_port->in_ring, buffer, *buffer_len,
 				    NULL);
@@ -791,6 +792,7 @@ dsw_port_end_emigration(struct dsw_evdev *dsw, struct dsw_port *port,
 
 	if (port->emigration_targets_len == 0) {
 		port->migration_state = DSW_MIGRATION_STATE_IDLE;
+		port->emigration_targets_len = 0;
 		port->seen_events_len = 0;
 	}
 }
@@ -844,27 +846,35 @@ dsw_port_consider_emigration(struct dsw_evdev *dsw,
 
 	DSW_LOG_DP_PORT(DEBUG, source_port->id, "Considering emigration.\n");
 
+	/* For simplicity, postpone migration if there are still
+	 * events to consume in the in_buffer (from the last
+	 * emigration).
+	 */
+	if (source_port->in_buffer_len > 0) {
+		DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are still "
+				"events in the input buffer.\n");
+		return;
+	}
+
+	if (source_port->migration_state != DSW_MIGRATION_STATE_IDLE) {
+		DSW_LOG_DP_PORT(DEBUG, source_port->id,
+				"Emigration already in progress.\n");
+		return;
+	}
+
 	if (seen_events_len < DSW_MAX_EVENTS_RECORDED) {
 		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Not enough events "
 				"are recorded to allow for a migration.\n");
 		return;
 	}
 
-	/* A flow migration cannot be initiated if there are paused
-	 * events, since some/all of those events may be have been
-	 * produced as a result of processing the flow(s) selected for
-	 * migration. Moving such a flow would potentially introduced
-	 * reordering, since processing the migrated flow on the
-	 * receiving flow may commence before the to-be-enqueued-to
-
-	 * flows are unpaused, leading to paused events on the second
-	 * port as well, destined for the same paused flow(s). When
-	 * those flows are unpaused, the resulting events are
-	 * delivered the owning port in an undefined order.
+	/* Postpone migration considering in case paused events exists, since
+	 * such events may prevent the migration procedure from completing,
+	 * leading to wasted CPU cycles (e.g., sorting queue flows).
 	 */
 	if (source_port->paused_events_len > 0) {
-		DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are "
-				"events in the paus buffer.\n");
+		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Paused events on "
+				"port. Postponing any migrations.\n");
 		return;
 	}
 
@@ -874,23 +884,7 @@ dsw_port_consider_emigration(struct dsw_evdev *dsw,
 	 */
 	source_port->next_emigration = now +
 		source_port->migration_interval / 2 +
-		rte_rand() % source_port->migration_interval;
-
-	if (source_port->migration_state != DSW_MIGRATION_STATE_IDLE) {
-		DSW_LOG_DP_PORT(DEBUG, source_port->id,
-				"Emigration already in progress.\n");
-		return;
-	}
-
-	/* For simplicity, avoid migration in the unlikely case there
-	 * is still events to consume in the in_buffer (from the last
-	 * emigration).
-	 */
-	if (source_port->in_buffer_len > 0) {
-		DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are still "
-				"events in the input buffer.\n");
-		return;
-	}
+		rte_rand_max(source_port->migration_interval);
 
 	source_port_load =
 		rte_atomic_load_explicit(&source_port->load,
@@ -936,7 +930,7 @@ dsw_port_consider_emigration(struct dsw_evdev *dsw,
 	if (source_port->emigration_targets_len == 0)
 		return;
 
-	source_port->migration_state = DSW_MIGRATION_STATE_PAUSING;
+	source_port->migration_state = DSW_MIGRATION_STATE_FINISH_PENDING;
 	source_port->emigration_start = rte_get_timer_cycles();
 
 	/* No need to go through the whole pause procedure for
@@ -944,24 +938,73 @@ dsw_port_consider_emigration(struct dsw_evdev *dsw,
 	 * be maintained.
 	 */
 	dsw_port_move_parallel_flows(dsw, source_port);
+}
 
-	/* All flows were on PARALLEL queues. */
-	if (source_port->migration_state == DSW_MIGRATION_STATE_IDLE)
-		return;
+static void
+dsw_port_abort_migration(struct dsw_port *source_port)
+{
+	RTE_ASSERT(port->in_buffer_start == 0);
+	RTE_ASSERT(port->in_buffer_len == 0);
 
-	/* There might be 'loopback' events already scheduled in the
-	 * output buffers.
+	/* Putting the stashed events in the in_buffer makes sure they
+	 * are processed before any events on the in_ring, to avoid
+	 * reordering.
 	 */
-	dsw_port_flush_out_buffers(dsw, source_port);
+	rte_memcpy(source_port->in_buffer, source_port->emigrating_events,
+		 source_port->emigrating_events_len * sizeof(struct rte_event));
+	source_port->in_buffer_len = source_port->emigrating_events_len;
+	source_port->emigrating_events_len = 0;
+
+	source_port->emigration_targets_len = 0;
+
+	source_port->migration_state = DSW_MIGRATION_STATE_IDLE;
+}
+
+static void
+dsw_port_continue_emigration(struct dsw_evdev *dsw,
+			     struct dsw_port *source_port)
+{
+	/* A flow migration cannot be completed if there are paused
+	 * events, since some/all of those events may be have been
+	 * produced as a result of processing the flow(s) selected for
+	 * migration. Moving such a flow would potentially introduced
+	 * reordering, since processing the migrated flow on the
+	 * receiving flow may commence before the to-be-enqueued-to
+	 * flows are unpaused, leading to paused events on the second
+	 * port as well, destined for the same paused flow(s). When
+	 * those flows are unpaused, the resulting events are
+	 * delivered the owning port in an undefined order.
+	 *
+	 * Waiting for the events to be unpaused could lead to a
+	 * deadlock, where two ports are both waiting for the other to
+	 * unpause.
+	 */
+	if (source_port->paused_events_len > 0) {
+		DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are events in "
+				"the pause buffer. Aborting migration.\n");
+		dsw_port_abort_migration(source_port);
+		return;
+	}
 
 	dsw_port_add_paused_flows(source_port,
 				  source_port->emigration_target_qfs,
 				  source_port->emigration_targets_len);
 
-	dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_PAUS_REQ,
+	dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_PAUSE_REQ,
 			       source_port->emigration_target_qfs,
 			       source_port->emigration_targets_len);
 	source_port->cfm_cnt = 0;
+
+	source_port->migration_state = DSW_MIGRATION_STATE_PAUSING;
+}
+
+static void
+dsw_port_try_finish_pending(struct dsw_evdev *dsw, struct dsw_port *source_port)
+{
+	if (unlikely(source_port->migration_state ==
+		     DSW_MIGRATION_STATE_FINISH_PENDING &&
+		     source_port->pending_releases == 0))
+		dsw_port_continue_emigration(dsw, source_port);
 }
 
 static void
@@ -982,8 +1025,6 @@ dsw_port_handle_unpause_flows(struct dsw_evdev *dsw, struct dsw_port *port,
 
 	dsw_port_remove_paused_flows(port, paused_qfs, qfs_len);
 
-	rte_smp_rmb();
-
 	dsw_port_ctl_enqueue(&dsw->ports[originating_port_id], &cfm);
 
 	for (i = 0; i < qfs_len; i++) {
@@ -1008,45 +1049,40 @@ dsw_port_buffer_in_buffer(struct dsw_port *port,
 }
 
 static void
-dsw_port_forward_emigrated_event(struct dsw_evdev *dsw,
-				 struct dsw_port *source_port,
-				 struct rte_event *event)
+dsw_port_stash_migrating_event(struct dsw_port *port,
+			       const struct rte_event *event)
+{
+	port->emigrating_events[port->emigrating_events_len] = *event;
+	port->emigrating_events_len++;
+}
+
+static void
+dsw_port_stash_any_migrating_events(struct dsw_port *port,
+				    struct rte_event *events,
+				    uint16_t *num)
 {
 	uint16_t i;
+	uint16_t offset = 0;
 
-	for (i = 0; i < source_port->emigration_targets_len; i++) {
-		struct dsw_queue_flow *qf =
-			&source_port->emigration_target_qfs[i];
-		uint8_t dest_port_id =
-			source_port->emigration_target_port_ids[i];
-		struct dsw_port *dest_port = &dsw->ports[dest_port_id];
+	for (i = 0; i < *num; i++) {
+		uint16_t flow_hash;
+		struct rte_event *in_event = &events[i];
 
-		if (event->queue_id == qf->queue_id &&
-		    dsw_flow_id_hash(event->flow_id) == qf->flow_hash) {
-			/* No need to care about bursting forwarded
-			 * events (to the destination port's in_ring),
-			 * since migration doesn't happen very often,
-			 * and also the majority of the dequeued
-			 * events will likely *not* be forwarded.
-			 */
-			while (rte_event_ring_enqueue_burst(dest_port->in_ring,
-							    event, 1,
-							    NULL) != 1)
-				rte_pause();
-			return;
+		flow_hash = dsw_flow_id_hash(in_event->flow_id);
+
+		if (unlikely(dsw_port_is_flow_migrating(port,
+							in_event->queue_id,
+							flow_hash))) {
+			dsw_port_stash_migrating_event(port, in_event);
+			offset++;
+		} else if (offset > 0) {
+			struct rte_event *out_event = &events[i - offset];
+			rte_memcpy(out_event, in_event,
+				   sizeof(struct rte_event));
 		}
 	}
 
-	/* Event did not belong to the emigrated flows */
-	dsw_port_buffer_in_buffer(source_port, event);
-}
-
-static void
-dsw_port_stash_migrating_event(struct dsw_port *port,
-			       const struct rte_event *event)
-{
-	port->emigrating_events[port->emigrating_events_len] = *event;
-	port->emigrating_events_len++;
+	*num -= offset;
 }
 
 #define DRAIN_DEQUEUE_BURST_SIZE (32)
@@ -1054,28 +1090,21 @@ dsw_port_stash_migrating_event(struct dsw_port *port,
 static void
 dsw_port_drain_in_ring(struct dsw_port *source_port)
 {
-	uint16_t num_events;
-	uint16_t dequeued;
-
-	/* Control ring message should been seen before the ring count
-	 * is read on the port's in_ring.
-	 */
-	rte_smp_rmb();
-
-	num_events = rte_event_ring_count(source_port->in_ring);
-
-	for (dequeued = 0; dequeued < num_events; ) {
-		uint16_t burst_size = RTE_MIN(DRAIN_DEQUEUE_BURST_SIZE,
-					      num_events - dequeued);
-		struct rte_event events[burst_size];
-		uint16_t len;
+	for (;;) {
+		struct rte_event events[DRAIN_DEQUEUE_BURST_SIZE];
+		uint16_t n;
 		uint16_t i;
+		uint16_t available;
+
+		n = rte_event_ring_dequeue_burst(source_port->in_ring,
+						 events,
+						 DRAIN_DEQUEUE_BURST_SIZE,
+						 &available);
 
-		len = rte_event_ring_dequeue_burst(source_port->in_ring,
-						   events, burst_size,
-						   NULL);
+		if (n == 0 && available == 0)
+			break;
 
-		for (i = 0; i < len; i++) {
+		for (i = 0; i < n; i++) {
 			struct rte_event *event = &events[i];
 			uint16_t flow_hash;
 
@@ -1089,9 +1118,41 @@ dsw_port_drain_in_ring(struct dsw_port *source_port)
 			else
 				dsw_port_buffer_in_buffer(source_port, event);
 		}
+	}
+}
 
-		dequeued += len;
+static void
+dsw_port_forward_emigrated_event(struct dsw_evdev *dsw,
+				 struct dsw_port *source_port,
+				 struct rte_event *event)
+{
+	uint16_t i;
+
+	for (i = 0; i < source_port->emigration_targets_len; i++) {
+		struct dsw_queue_flow *qf =
+			&source_port->emigration_target_qfs[i];
+		uint8_t dest_port_id =
+			source_port->emigration_target_port_ids[i];
+		struct dsw_port *dest_port = &dsw->ports[dest_port_id];
+
+		if (event->queue_id == qf->queue_id &&
+		    dsw_flow_id_hash(event->flow_id) == qf->flow_hash) {
+			/* No need to care about bursting forwarded
+			 * events (to the destination port's in_ring),
+			 * since migration doesn't happen very often,
+			 * and also the majority of the dequeued
+			 * events will likely *not* be forwarded.
+			 */
+			while (rte_event_ring_enqueue_burst(dest_port->in_ring,
+							    event, 1,
+							    NULL) != 1)
+				rte_pause();
+			return;
+		}
 	}
+
+	/* Event did not belong to the emigrated flows */
+	dsw_port_buffer_in_buffer(source_port, event);
 }
 
 static void
@@ -1114,6 +1175,9 @@ dsw_port_move_emigrating_flows(struct dsw_evdev *dsw,
 {
 	uint8_t i;
 
+	/* There may be events lingering in the output buffer from
+	 * prior to the pause took effect.
+	 */
 	dsw_port_flush_out_buffers(dsw, source_port);
 
 	for (i = 0; i < source_port->emigration_targets_len; i++) {
@@ -1137,12 +1201,21 @@ dsw_port_move_emigrating_flows(struct dsw_evdev *dsw,
 
 	dsw_port_flush_no_longer_paused_events(dsw, source_port);
 
+	/* Processing migrating flows during migration may have
+	 * produced events to paused flows, including the flows which
+	 * were being migrated. Flushing the output buffers before
+	 * unpausing the flows on other ports assures that such events
+	 * are seen *before* any events produced by processing the
+	 * migrating flows on the new port.
+	 */
+	dsw_port_flush_out_buffers(dsw, source_port);
+
 	/* Flow table update and migration destination port's enqueues
 	 * must be seen before the control message.
 	 */
 	rte_smp_wmb();
 
-	dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_UNPAUS_REQ,
+	dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_UNPAUSE_REQ,
 			       source_port->emigration_target_qfs,
 			       source_port->emigration_targets_len);
 	source_port->cfm_cnt = 0;
@@ -1154,7 +1227,7 @@ dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
 {
 	port->cfm_cnt++;
 
-	if (port->cfm_cnt == (dsw->num_ports-1)) {
+	if (port->cfm_cnt == (dsw->num_ports - 1)) {
 		switch (port->migration_state) {
 		case DSW_MIGRATION_STATE_PAUSING:
 			dsw_port_move_emigrating_flows(dsw, port);
@@ -1164,7 +1237,7 @@ dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
 						RTE_SCHED_TYPE_ATOMIC);
 			break;
 		default:
-			RTE_ASSERT(0);
+			RTE_VERIFY(0);
 			break;
 		}
 	}
@@ -1177,12 +1250,12 @@ dsw_port_ctl_process(struct dsw_evdev *dsw, struct dsw_port *port)
 
 	if (dsw_port_ctl_dequeue(port, &msg) == 0) {
 		switch (msg.type) {
-		case DSW_CTL_PAUS_REQ:
+		case DSW_CTL_PAUSE_REQ:
 			dsw_port_handle_pause_flows(dsw, port,
 						    msg.originating_port_id,
 						    msg.qfs, msg.qfs_len);
 			break;
-		case DSW_CTL_UNPAUS_REQ:
+		case DSW_CTL_UNPAUSE_REQ:
 			dsw_port_handle_unpause_flows(dsw, port,
 						      msg.originating_port_id,
 						      msg.qfs, msg.qfs_len);
@@ -1203,19 +1276,18 @@ dsw_port_note_op(struct dsw_port *port, uint16_t num_events)
 static void
 dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port)
 {
-	/* For simplicity (in the migration logic), avoid all
-	 * background processing in case event processing is in
-	 * progress.
-	 */
-	if (port->pending_releases > 0)
-		return;
-
 	/* Polling the control ring is relatively inexpensive, and
 	 * polling it often helps bringing down migration latency, so
 	 * do this for every iteration.
 	 */
 	dsw_port_ctl_process(dsw, port);
 
+	/* Always check if a migration is waiting for pending releases
+	 * to arrive, to minimize the amount of time dequeuing events
+	 * from the port is disabled.
+	 */
+	dsw_port_try_finish_pending(dsw, port);
+
 	/* To avoid considering migration and flushing output buffers
 	 * on every dequeue/enqueue call, the scheduler only performs
 	 * such 'background' tasks every nth
@@ -1260,8 +1332,8 @@ static __rte_always_inline uint16_t
 dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 				const struct rte_event events[],
 				uint16_t events_len, bool op_types_known,
-				uint16_t num_new, uint16_t num_release,
-				uint16_t num_non_release)
+				uint16_t num_new, uint16_t num_forward,
+				uint16_t num_release)
 {
 	struct dsw_evdev *dsw = source_port->dsw;
 	bool enough_credits;
@@ -1295,14 +1367,14 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 	if (!op_types_known)
 		for (i = 0; i < events_len; i++) {
 			switch (events[i].op) {
-			case RTE_EVENT_OP_RELEASE:
-				num_release++;
-				break;
 			case RTE_EVENT_OP_NEW:
 				num_new++;
-				/* Falls through. */
-			default:
-				num_non_release++;
+				break;
+			case RTE_EVENT_OP_FORWARD:
+				num_forward++;
+				break;
+			case RTE_EVENT_OP_RELEASE:
+				num_release++;
 				break;
 			}
 		}
@@ -1318,15 +1390,20 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 		     source_port->new_event_threshold))
 		return 0;
 
-	enough_credits = dsw_port_acquire_credits(dsw, source_port,
-						  num_non_release);
+	enough_credits = dsw_port_acquire_credits(dsw, source_port, num_new);
 	if (unlikely(!enough_credits))
 		return 0;
 
-	source_port->pending_releases -= num_release;
+	dsw_port_return_credits(dsw, source_port, num_release);
+
+	/* This may seem harsh, but it's important for an application
+	 * to get early feedback for cases where it fails to stick to
+	 * the API contract.
+	 */
+	RTE_VERIFY(num_forward + num_release <= source_port->pending_releases);
+	source_port->pending_releases -= (num_forward + num_release);
 
-	dsw_port_enqueue_stats(source_port, num_new,
-			       num_non_release-num_new, num_release);
+	dsw_port_enqueue_stats(source_port, num_new, num_forward, num_release);
 
 	for (i = 0; i < events_len; i++) {
 		const struct rte_event *event = &events[i];
@@ -1338,9 +1415,9 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 	}
 
 	DSW_LOG_DP_PORT(DEBUG, source_port->id, "%d non-release events "
-			"accepted.\n", num_non_release);
+			"accepted.\n", num_new + num_forward);
 
-	return (num_non_release + num_release);
+	return (num_new + num_forward + num_release);
 }
 
 uint16_t
@@ -1367,7 +1444,7 @@ dsw_event_enqueue_new_burst(void *port, const struct rte_event events[],
 
 	return dsw_event_enqueue_burst_generic(source_port, events,
 					       events_len, true, events_len,
-					       0, events_len);
+					       0, 0);
 }
 
 uint16_t
@@ -1380,8 +1457,8 @@ dsw_event_enqueue_forward_burst(void *port, const struct rte_event events[],
 		events_len = source_port->enqueue_depth;
 
 	return dsw_event_enqueue_burst_generic(source_port, events,
-					       events_len, true, 0, 0,
-					       events_len);
+					       events_len, true, 0,
+					       events_len, 0);
 }
 
 uint16_t
@@ -1435,8 +1512,17 @@ static uint16_t
 dsw_port_dequeue_burst(struct dsw_port *port, struct rte_event *events,
 		       uint16_t num)
 {
-	if (unlikely(port->in_buffer_len > 0)) {
-		uint16_t dequeued = RTE_MIN(num, port->in_buffer_len);
+	enum dsw_migration_state state = port->migration_state;
+	uint16_t dequeued;
+
+	if (unlikely(state == DSW_MIGRATION_STATE_FINISH_PENDING))
+		/* Do not produce new items of work - only finish
+		 * outstanding (unreleased) events, to allow the
+		 * migration procedure to continue.
+		 */
+		dequeued = 0;
+	else if (unlikely(port->in_buffer_len > 0)) {
+		dequeued = RTE_MIN(num, port->in_buffer_len);
 
 		rte_memcpy(events, &port->in_buffer[port->in_buffer_start],
 			   dequeued * sizeof(struct rte_event));
@@ -1446,43 +1532,24 @@ dsw_port_dequeue_burst(struct dsw_port *port, struct rte_event *events,
 
 		if (port->in_buffer_len == 0)
 			port->in_buffer_start = 0;
-
-		return dequeued;
+	} else {
+		dequeued = rte_event_ring_dequeue_burst(port->in_ring,
+							events, num, NULL);
+
+		/* Stash incoming events belonging to migrating flows,
+		 * to avoid having to deal with forwarded events to
+		 * flows which are also in the process of being
+		 * migrated. A failure to do so leads to reordering,
+		 * since paused events on the source port may be
+		 * flushed after paused events on the migration
+		 * destination port.
+		 */
+		if (unlikely(state == DSW_MIGRATION_STATE_PAUSING))
+			dsw_port_stash_any_migrating_events(port, events,
+							    &dequeued);
 	}
 
-	return rte_event_ring_dequeue_burst(port->in_ring, events, num, NULL);
-}
-
-static void
-dsw_port_stash_migrating_events(struct dsw_port *port,
-				struct rte_event *events, uint16_t *num)
-{
-	uint16_t i;
-
-	/* The assumption here - performance-wise - is that events
-	 * belonging to migrating flows are relatively rare.
-	 */
-	for (i = 0; i < (*num); ) {
-		struct rte_event *event = &events[i];
-		uint16_t flow_hash;
-
-		flow_hash = dsw_flow_id_hash(event->flow_id);
-
-		if (unlikely(dsw_port_is_flow_migrating(port, event->queue_id,
-							flow_hash))) {
-			uint16_t left;
-
-			dsw_port_stash_migrating_event(port, event);
-
-			(*num)--;
-			left = *num - i;
-
-			if (left > 0)
-				memmove(event, event + 1,
-					left * sizeof(struct rte_event));
-		} else
-			i++;
-	}
+	return dequeued;
 }
 
 uint16_t
@@ -1493,7 +1560,12 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 	struct dsw_evdev *dsw = source_port->dsw;
 	uint16_t dequeued;
 
-	source_port->pending_releases = 0;
+	if (source_port->implicit_release) {
+		dsw_port_return_credits(dsw, port,
+					source_port->pending_releases);
+
+		source_port->pending_releases = 0;
+	}
 
 	dsw_port_bg_process(dsw, source_port);
 
@@ -1502,12 +1574,7 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 
 	dequeued = dsw_port_dequeue_burst(source_port, events, num);
 
-	if (unlikely(source_port->migration_state ==
-		     DSW_MIGRATION_STATE_PAUSING))
-		dsw_port_stash_migrating_events(source_port, events,
-						&dequeued);
-
-	source_port->pending_releases = dequeued;
+	source_port->pending_releases += dequeued;
 
 	dsw_port_load_record(source_port, dequeued);
 
@@ -1517,8 +1584,6 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n",
 				dequeued);
 
-		dsw_port_return_credits(dsw, source_port, dequeued);
-
 		/* One potential optimization one might think of is to
 		 * add a migration state (prior to 'pausing'), and
 		 * only record seen events when the port is in this
-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] event/dsw: support explicit release only mode
  2024-06-05 13:38   ` [PATCH v2] " Mattias Rönnblom
@ 2024-06-06  9:59     ` Jerin Jacob
  2024-06-07 13:36     ` [PATCH v3] " Mattias Rönnblom
  1 sibling, 0 replies; 10+ messages in thread
From: Jerin Jacob @ 2024-06-06  9:59 UTC (permalink / raw)
  To: Mattias Rönnblom
  Cc: Jerin Jacob, dev, hofors, bruce.richardson, Peter Nilsson J,
	Svante Järvstråt, Heng Wang

On Wed, Jun 5, 2024 at 7:43 PM Mattias Rönnblom
<mattias.ronnblom@ericsson.com> wrote:
>
> Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
> DSW event device.
>
> This feature may be used by an EAL thread to pull more work from the
> work scheduler, without giving up the option to forward events
> originating from a previous dequeue batch. This in turn allows an EAL
> thread to be productive while waiting for a hardware accelerator to
> complete some operation.
>
> Prior to this change, DSW didn't make any distinction between
> RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
> new events would be backpressured earlier.
>
> After this change, DSW tracks the number of released events (i.e.,
> events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE) that has
> been enqueued.
>
> For efficiency reasons, DSW does not track the identity of individual
> events. This in turn implies that a certain stage in the flow
> migration process, DSW must wait for all pending releases (on the
> migration source port, only) to be received from the application, to
> assure that no event pertaining to any of the to-be-migrated flows are
> being processed.
>
> With this change, DSW starts making a distinction between forward and
> new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
> events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
> have a corresponding dequeued event from a previous dequeue batch.
>
> Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
> unaffected by this change.
>
> A side-effect of the tweaked DSW migration logic is that the migration
> latency is reduced, regardless if implicit release is enabled or not.
>
> Another side-effect is that migrated flows are now not processed
> during any part of the migration procedure. An upside of this change
> it reduces the load of the overloaded port. A downside is it
> introduces slightly more jitter for the migrated flows.
>
> This patch is contains various minor refactorings, improved
> formatting, fixed spelling, and the removal of unnessary memory
> barriers.
>
> v2:
>   * Remove redundant memory barriers.
>   * Discontinue processing of migrated flows throughout the migration
>     procedure. This is a part of a fix to address a reordering issue
>     v1 of this patch introduced.
>   * Added entry in the release notes.
>   * Fix spelling issues in commit message.
>
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

meson --werror -Dc_args='-DRTE_ENABLE_ASSERT' -Denable_docs=true build


[1602/2784] Generating drivers/rte_net_af_xdp.pmd.c with a custom command
[1603/2784] Generating drivers/rte_net_ark.pmd.c with a custom command
[1604/2784] Generating drivers/rte_net_axgbe.pmd.c with a custom command
[1605/2784] Compiling C object
drivers/libtmp_rte_event_dsw.a.p/event_dsw_dsw_event.c.o
FAILED: drivers/libtmp_rte_event_dsw.a.p/event_dsw_dsw_event.c.o
ccache cc -Idrivers/libtmp_rte_event_dsw.a.p -Idrivers -I../drivers
-Idrivers/event/dsw -I../drivers/event/dsw -Ilib/eventdev
-I../lib/eventdev -I. -I.. -Iconfig -I../config -Ilib/eal/include
-I../lib/eal/include -Ilib/eal/linux/include
-I../lib/eal/linux/include -Ilib/eal/x86/include
-I../lib/eal/x86/include -Ilib/eal/common -I../lib/eal/common
-Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/log
-I../lib/log -Ilib/metrics -I../lib/metrics -Ilib/telemetry
-I../lib/telemetry -Ilib/ring -I../lib/ring -Ilib/ethdev
-I../lib/ethdev -Ilib/net -I../lib/net -Ilib/mbuf -I../lib/mbuf
-Ilib/mempool -I../lib/mempool -Ilib/meter -I../lib/meter -Ilib/hash
-I../lib/hash -Ilib/rcu -I../lib/rcu -Ilib/timer -I../lib/timer
-Ilib/cryptodev -I../lib/cryptodev -Ilib/dmadev -I../lib/dmadev
-Idrivers/bus/vdev -I../drivers/bus/vdev -fdiagnostics-color=always
-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Wextra -Werror -std=c11
-O3 -include rte_config.h -Wcast-qual -Wdeprecated -Wformat
-Wformat-nonliteral -Wformat-security -Wmissing-declarations
-Wmissing-prototypes -Wnested-externs -Wold-style-definition
-Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef
-Wwrite-strings -Wno-address-of-packed-member -Wno-packed-not-aligned
-Wno-missing-field-initializers -Wno-zero-length-bounds -D_GNU_SOURCE
-DRTE_ENABLE_ASSERT -fPIC -march=native -mrtm -DALLOW_EXPERIMENTAL_API
-DALLOW_INTERNAL_API -Wno-format-truncation -Wno-format-nonliteral
-DRTE_LOG_DEFAULT_LOGTYPE=pmd.event.dsw -MD -MQ
drivers/libtmp_rte_event_dsw.a.p/event_dsw_dsw_event.c.o -MF
drivers/libtmp_rte_event_dsw.a.p/event_dsw_dsw_event.c.o.d -o
drivers/libtmp_rte_event_dsw.a.p/event_dsw_dsw_event.c.o -c
../drivers/event/dsw/dsw_event.c
In file included from ../lib/eal/include/rte_debug.h:18,
                 from ../lib/eal/include/rte_bitops.h:20,
                 from ../lib/eal/include/rte_memory.h:22,
                 from ../lib/eal/include/rte_malloc.h:16,
                 from ../lib/eventdev/eventdev_pmd.h:27,
                 from ../drivers/event/dsw/dsw_evdev.h:8,
                 from ../drivers/event/dsw/dsw_event.c:5:
../drivers/event/dsw/dsw_event.c: In function ‘dsw_port_abort_migration’:
../drivers/event/dsw/dsw_event.c:946:20: error: ‘port’ undeclared
(first use in this function)
  946 |         RTE_ASSERT(port->in_buffer_start == 0);
      |                    ^~~~
../lib/eal/include/rte_branch_prediction.h:47:45: note: in definition
of macro ‘unlikely’
   47 | #define unlikely(x)     __builtin_expect(!!(x), 0)
      |                                             ^
../lib/eal/include/rte_debug.h:47:25: note: in expansion of macro ‘RTE_VERIFY’
   47 | #define RTE_ASSERT(exp) RTE_VERIFY(exp)
      |                         ^~~~~~~~~~
../drivers/event/dsw/dsw_event.c:946:9: note: in expansion of macro ‘RTE_ASSERT’
  946 |         RTE_ASSERT(port->in_buffer_start == 0);
      |         ^~~~~~~~~~
../drivers/event/dsw/dsw_event.c:946:20: note: each undeclared
identifier is reported only once for each function it appears in
  946 |         RTE_ASSERT(port->in_buffer_start == 0);
      |                    ^~~~
../lib/eal/include/rte_branch_prediction.h:47:45: note: in definition
of macro ‘unlikely’
   47 | #define unlikely(x)     __builtin_expect(!!(x), 0)
      |                                             ^
../lib/eal/include/rte_debug.h:47:25: note: in expansion of macro ‘RTE_VERIFY’
   47 | #define RTE_ASSERT(exp) RTE_VERIFY(exp)
      |                         ^~~~~~~~~~
../drivers/event/dsw/dsw_event.c:946:9: note: in expansion of macro ‘RTE_ASSERT’
  946 |         RTE_ASSERT(port->in_buffer_start == 0);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] event/dsw: support explicit release only mode
  2024-06-05 13:38   ` [PATCH v2] " Mattias Rönnblom
  2024-06-06  9:59     ` Jerin Jacob
@ 2024-06-07 13:36     ` Mattias Rönnblom
  2024-06-08  6:14       ` Jerin Jacob
  1 sibling, 1 reply; 10+ messages in thread
From: Mattias Rönnblom @ 2024-06-07 13:36 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: dev, hofors, bruce.richardson, Peter Nilsson J,
	Svante Järvstråt, Heng Wang, Mattias Rönnblom

Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
DSW event device.

This feature may be used by an EAL thread to pull more work from the
work scheduler, without giving up the option to forward events
originating from a previous dequeue batch. This in turn allows an EAL
thread to be productive while waiting for a hardware accelerator to
complete some operation.

Prior to this change, DSW didn't make any distinction between
RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
new events would be backpressured earlier.

After this change, DSW tracks the number of released events (i.e.,
events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE) that has
been enqueued.

For efficiency reasons, DSW does not track the identity of individual
events. This in turn implies that a certain stage in the flow
migration process, DSW must wait for all pending releases (on the
migration source port, only) to be received from the application, to
assure that no event pertaining to any of the to-be-migrated flows are
being processed.

With this change, DSW starts making a distinction between forward and
new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
have a corresponding dequeued event from a previous dequeue batch.

Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
unaffected by this change.

A side-effect of the tweaked DSW migration logic is that the migration
latency is reduced, regardless if implicit release is enabled or not.

Another side-effect is that migrated flows are now not processed
during any part of the migration procedure. An upside of this change
it reduces the load of the overloaded port. A downside is it
introduces slightly more jitter for the migrated flows.

This patch is contains various minor refactorings, improved
formatting, fixed spelling, and the removal of unnessary memory
barriers.

v3:
 * Fix broken RTE_ASSERT()s. (Jerin Jacob)

v2:
  * Remove redundant memory barriers.
  * Discontinue processing of migrated flows throughout the migration
    procedure. This is a part of a fix to address a reordering issue
    v1 of this patch introduced.
  * Added entry in the release notes.
  * Fix spelling issues in commit message.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 doc/guides/rel_notes/release_24_07.rst |   7 +
 drivers/event/dsw/dsw_evdev.c          |   8 +-
 drivers/event/dsw/dsw_evdev.h          |   7 +-
 drivers/event/dsw/dsw_event.c          | 405 ++++++++++++++-----------
 4 files changed, 254 insertions(+), 173 deletions(-)

diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst
index a69f24cf99..706cc71212 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -24,6 +24,13 @@ DPDK Release 24.07
 New Features
 ------------
 
+* **Updated the DSW event device.**
+
+  * Added support for ``RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE``,
+    allowing applications to take on new tasks without having completed
+    (released) the previous event batch. This in turn facilities DSW
+    use alongside high-latency look-aside hardware accelerators.
+
 .. This section should contain new features added in this release.
    Sample format:
 
diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index ab0420b549..0dea1091e3 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -23,15 +23,20 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
 	struct rte_event_ring *in_ring;
 	struct rte_ring *ctl_in_ring;
 	char ring_name[RTE_RING_NAMESIZE];
+	bool implicit_release;
 
 	port = &dsw->ports[port_id];
 
+	implicit_release =
+	    !(conf->event_port_cfg & RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
+
 	*port = (struct dsw_port) {
 		.id = port_id,
 		.dsw = dsw,
 		.dequeue_depth = conf->dequeue_depth,
 		.enqueue_depth = conf->enqueue_depth,
-		.new_event_threshold = conf->new_event_threshold
+		.new_event_threshold = conf->new_event_threshold,
+		.implicit_release = implicit_release
 	};
 
 	snprintf(ring_name, sizeof(ring_name), "dsw%d_p%u", dev->data->dev_id,
@@ -222,6 +227,7 @@ dsw_info_get(struct rte_eventdev *dev __rte_unused,
 		RTE_EVENT_DEV_CAP_ATOMIC |
 		RTE_EVENT_DEV_CAP_PARALLEL |
 		RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED|
+		RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE|
 		RTE_EVENT_DEV_CAP_NONSEQ_MODE|
 		RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT|
 		RTE_EVENT_DEV_CAP_CARRY_FLOW_ID
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index 2018306265..c9bf4f8b6b 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -127,6 +127,7 @@ struct dsw_queue_flow {
 
 enum dsw_migration_state {
 	DSW_MIGRATION_STATE_IDLE,
+	DSW_MIGRATION_STATE_FINISH_PENDING,
 	DSW_MIGRATION_STATE_PAUSING,
 	DSW_MIGRATION_STATE_UNPAUSING
 };
@@ -148,6 +149,8 @@ struct __rte_cache_aligned dsw_port {
 
 	int32_t new_event_threshold;
 
+	bool implicit_release;
+
 	uint16_t pending_releases;
 
 	uint16_t next_parallel_flow_id;
@@ -255,8 +258,8 @@ struct dsw_evdev {
 	alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) credits_on_loan;
 };
 
-#define DSW_CTL_PAUS_REQ (0)
-#define DSW_CTL_UNPAUS_REQ (1)
+#define DSW_CTL_PAUSE_REQ (0)
+#define DSW_CTL_UNPAUSE_REQ (1)
 #define DSW_CTL_CFM (2)
 
 struct __rte_aligned(4) dsw_ctl_msg {
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index ca2b8e1032..33f741990f 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -275,7 +275,7 @@ dsw_port_add_paused_flows(struct dsw_port *port, struct dsw_queue_flow *qfs,
 
 static void
 dsw_port_remove_paused_flow(struct dsw_port *port,
-			    struct dsw_queue_flow *target_qf)
+			    const struct dsw_queue_flow *target_qf)
 {
 	uint16_t i;
 
@@ -302,6 +302,7 @@ dsw_port_remove_paused_flow(struct dsw_port *port,
 	DSW_LOG_DP_PORT(ERR, port->id,
 			"Failed to unpause queue_id %d flow_hash %d.\n",
 			target_qf->queue_id, target_qf->flow_hash);
+	RTE_VERIFY(0);
 }
 
 static void
@@ -602,7 +603,7 @@ dsw_port_transmit_buffered(struct dsw_evdev *dsw, struct dsw_port *source_port,
 		return;
 
 	/* The rings are dimensioned to fit all in-flight events (even
-	 * on a single ring), so looping will work.
+	 * on a single ring).
 	 */
 	rte_event_ring_enqueue_bulk(dest_port->in_ring, buffer, *buffer_len,
 				    NULL);
@@ -791,6 +792,7 @@ dsw_port_end_emigration(struct dsw_evdev *dsw, struct dsw_port *port,
 
 	if (port->emigration_targets_len == 0) {
 		port->migration_state = DSW_MIGRATION_STATE_IDLE;
+		port->emigration_targets_len = 0;
 		port->seen_events_len = 0;
 	}
 }
@@ -844,27 +846,35 @@ dsw_port_consider_emigration(struct dsw_evdev *dsw,
 
 	DSW_LOG_DP_PORT(DEBUG, source_port->id, "Considering emigration.\n");
 
+	/* For simplicity, postpone migration if there are still
+	 * events to consume in the in_buffer (from the last
+	 * emigration).
+	 */
+	if (source_port->in_buffer_len > 0) {
+		DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are still "
+				"events in the input buffer.\n");
+		return;
+	}
+
+	if (source_port->migration_state != DSW_MIGRATION_STATE_IDLE) {
+		DSW_LOG_DP_PORT(DEBUG, source_port->id,
+				"Emigration already in progress.\n");
+		return;
+	}
+
 	if (seen_events_len < DSW_MAX_EVENTS_RECORDED) {
 		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Not enough events "
 				"are recorded to allow for a migration.\n");
 		return;
 	}
 
-	/* A flow migration cannot be initiated if there are paused
-	 * events, since some/all of those events may be have been
-	 * produced as a result of processing the flow(s) selected for
-	 * migration. Moving such a flow would potentially introduced
-	 * reordering, since processing the migrated flow on the
-	 * receiving flow may commence before the to-be-enqueued-to
-
-	 * flows are unpaused, leading to paused events on the second
-	 * port as well, destined for the same paused flow(s). When
-	 * those flows are unpaused, the resulting events are
-	 * delivered the owning port in an undefined order.
+	/* Postpone migration considering in case paused events exists, since
+	 * such events may prevent the migration procedure from completing,
+	 * leading to wasted CPU cycles (e.g., sorting queue flows).
 	 */
 	if (source_port->paused_events_len > 0) {
-		DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are "
-				"events in the paus buffer.\n");
+		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Paused events on "
+				"port. Postponing any migrations.\n");
 		return;
 	}
 
@@ -874,23 +884,7 @@ dsw_port_consider_emigration(struct dsw_evdev *dsw,
 	 */
 	source_port->next_emigration = now +
 		source_port->migration_interval / 2 +
-		rte_rand() % source_port->migration_interval;
-
-	if (source_port->migration_state != DSW_MIGRATION_STATE_IDLE) {
-		DSW_LOG_DP_PORT(DEBUG, source_port->id,
-				"Emigration already in progress.\n");
-		return;
-	}
-
-	/* For simplicity, avoid migration in the unlikely case there
-	 * is still events to consume in the in_buffer (from the last
-	 * emigration).
-	 */
-	if (source_port->in_buffer_len > 0) {
-		DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are still "
-				"events in the input buffer.\n");
-		return;
-	}
+		rte_rand_max(source_port->migration_interval);
 
 	source_port_load =
 		rte_atomic_load_explicit(&source_port->load,
@@ -936,7 +930,7 @@ dsw_port_consider_emigration(struct dsw_evdev *dsw,
 	if (source_port->emigration_targets_len == 0)
 		return;
 
-	source_port->migration_state = DSW_MIGRATION_STATE_PAUSING;
+	source_port->migration_state = DSW_MIGRATION_STATE_FINISH_PENDING;
 	source_port->emigration_start = rte_get_timer_cycles();
 
 	/* No need to go through the whole pause procedure for
@@ -944,24 +938,73 @@ dsw_port_consider_emigration(struct dsw_evdev *dsw,
 	 * be maintained.
 	 */
 	dsw_port_move_parallel_flows(dsw, source_port);
+}
 
-	/* All flows were on PARALLEL queues. */
-	if (source_port->migration_state == DSW_MIGRATION_STATE_IDLE)
-		return;
+static void
+dsw_port_abort_migration(struct dsw_port *source_port)
+{
+	RTE_ASSERT(source_port->in_buffer_start == 0);
+	RTE_ASSERT(source_port->in_buffer_len == 0);
 
-	/* There might be 'loopback' events already scheduled in the
-	 * output buffers.
+	/* Putting the stashed events in the in_buffer makes sure they
+	 * are processed before any events on the in_ring, to avoid
+	 * reordering.
 	 */
-	dsw_port_flush_out_buffers(dsw, source_port);
+	rte_memcpy(source_port->in_buffer, source_port->emigrating_events,
+		 source_port->emigrating_events_len * sizeof(struct rte_event));
+	source_port->in_buffer_len = source_port->emigrating_events_len;
+	source_port->emigrating_events_len = 0;
+
+	source_port->emigration_targets_len = 0;
+
+	source_port->migration_state = DSW_MIGRATION_STATE_IDLE;
+}
+
+static void
+dsw_port_continue_emigration(struct dsw_evdev *dsw,
+			     struct dsw_port *source_port)
+{
+	/* A flow migration cannot be completed if there are paused
+	 * events, since some/all of those events may be have been
+	 * produced as a result of processing the flow(s) selected for
+	 * migration. Moving such a flow would potentially introduced
+	 * reordering, since processing the migrated flow on the
+	 * receiving flow may commence before the to-be-enqueued-to
+	 * flows are unpaused, leading to paused events on the second
+	 * port as well, destined for the same paused flow(s). When
+	 * those flows are unpaused, the resulting events are
+	 * delivered the owning port in an undefined order.
+	 *
+	 * Waiting for the events to be unpaused could lead to a
+	 * deadlock, where two ports are both waiting for the other to
+	 * unpause.
+	 */
+	if (source_port->paused_events_len > 0) {
+		DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are events in "
+				"the pause buffer. Aborting migration.\n");
+		dsw_port_abort_migration(source_port);
+		return;
+	}
 
 	dsw_port_add_paused_flows(source_port,
 				  source_port->emigration_target_qfs,
 				  source_port->emigration_targets_len);
 
-	dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_PAUS_REQ,
+	dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_PAUSE_REQ,
 			       source_port->emigration_target_qfs,
 			       source_port->emigration_targets_len);
 	source_port->cfm_cnt = 0;
+
+	source_port->migration_state = DSW_MIGRATION_STATE_PAUSING;
+}
+
+static void
+dsw_port_try_finish_pending(struct dsw_evdev *dsw, struct dsw_port *source_port)
+{
+	if (unlikely(source_port->migration_state ==
+		     DSW_MIGRATION_STATE_FINISH_PENDING &&
+		     source_port->pending_releases == 0))
+		dsw_port_continue_emigration(dsw, source_port);
 }
 
 static void
@@ -982,8 +1025,6 @@ dsw_port_handle_unpause_flows(struct dsw_evdev *dsw, struct dsw_port *port,
 
 	dsw_port_remove_paused_flows(port, paused_qfs, qfs_len);
 
-	rte_smp_rmb();
-
 	dsw_port_ctl_enqueue(&dsw->ports[originating_port_id], &cfm);
 
 	for (i = 0; i < qfs_len; i++) {
@@ -1008,45 +1049,40 @@ dsw_port_buffer_in_buffer(struct dsw_port *port,
 }
 
 static void
-dsw_port_forward_emigrated_event(struct dsw_evdev *dsw,
-				 struct dsw_port *source_port,
-				 struct rte_event *event)
+dsw_port_stash_migrating_event(struct dsw_port *port,
+			       const struct rte_event *event)
+{
+	port->emigrating_events[port->emigrating_events_len] = *event;
+	port->emigrating_events_len++;
+}
+
+static void
+dsw_port_stash_any_migrating_events(struct dsw_port *port,
+				    struct rte_event *events,
+				    uint16_t *num)
 {
 	uint16_t i;
+	uint16_t offset = 0;
 
-	for (i = 0; i < source_port->emigration_targets_len; i++) {
-		struct dsw_queue_flow *qf =
-			&source_port->emigration_target_qfs[i];
-		uint8_t dest_port_id =
-			source_port->emigration_target_port_ids[i];
-		struct dsw_port *dest_port = &dsw->ports[dest_port_id];
+	for (i = 0; i < *num; i++) {
+		uint16_t flow_hash;
+		struct rte_event *in_event = &events[i];
 
-		if (event->queue_id == qf->queue_id &&
-		    dsw_flow_id_hash(event->flow_id) == qf->flow_hash) {
-			/* No need to care about bursting forwarded
-			 * events (to the destination port's in_ring),
-			 * since migration doesn't happen very often,
-			 * and also the majority of the dequeued
-			 * events will likely *not* be forwarded.
-			 */
-			while (rte_event_ring_enqueue_burst(dest_port->in_ring,
-							    event, 1,
-							    NULL) != 1)
-				rte_pause();
-			return;
+		flow_hash = dsw_flow_id_hash(in_event->flow_id);
+
+		if (unlikely(dsw_port_is_flow_migrating(port,
+							in_event->queue_id,
+							flow_hash))) {
+			dsw_port_stash_migrating_event(port, in_event);
+			offset++;
+		} else if (offset > 0) {
+			struct rte_event *out_event = &events[i - offset];
+			rte_memcpy(out_event, in_event,
+				   sizeof(struct rte_event));
 		}
 	}
 
-	/* Event did not belong to the emigrated flows */
-	dsw_port_buffer_in_buffer(source_port, event);
-}
-
-static void
-dsw_port_stash_migrating_event(struct dsw_port *port,
-			       const struct rte_event *event)
-{
-	port->emigrating_events[port->emigrating_events_len] = *event;
-	port->emigrating_events_len++;
+	*num -= offset;
 }
 
 #define DRAIN_DEQUEUE_BURST_SIZE (32)
@@ -1054,28 +1090,21 @@ dsw_port_stash_migrating_event(struct dsw_port *port,
 static void
 dsw_port_drain_in_ring(struct dsw_port *source_port)
 {
-	uint16_t num_events;
-	uint16_t dequeued;
-
-	/* Control ring message should been seen before the ring count
-	 * is read on the port's in_ring.
-	 */
-	rte_smp_rmb();
-
-	num_events = rte_event_ring_count(source_port->in_ring);
-
-	for (dequeued = 0; dequeued < num_events; ) {
-		uint16_t burst_size = RTE_MIN(DRAIN_DEQUEUE_BURST_SIZE,
-					      num_events - dequeued);
-		struct rte_event events[burst_size];
-		uint16_t len;
+	for (;;) {
+		struct rte_event events[DRAIN_DEQUEUE_BURST_SIZE];
+		uint16_t n;
 		uint16_t i;
+		uint16_t available;
 
-		len = rte_event_ring_dequeue_burst(source_port->in_ring,
-						   events, burst_size,
-						   NULL);
+		n = rte_event_ring_dequeue_burst(source_port->in_ring,
+						 events,
+						 DRAIN_DEQUEUE_BURST_SIZE,
+						 &available);
 
-		for (i = 0; i < len; i++) {
+		if (n == 0 && available == 0)
+			break;
+
+		for (i = 0; i < n; i++) {
 			struct rte_event *event = &events[i];
 			uint16_t flow_hash;
 
@@ -1089,9 +1118,41 @@ dsw_port_drain_in_ring(struct dsw_port *source_port)
 			else
 				dsw_port_buffer_in_buffer(source_port, event);
 		}
+	}
+}
 
-		dequeued += len;
+static void
+dsw_port_forward_emigrated_event(struct dsw_evdev *dsw,
+				 struct dsw_port *source_port,
+				 struct rte_event *event)
+{
+	uint16_t i;
+
+	for (i = 0; i < source_port->emigration_targets_len; i++) {
+		struct dsw_queue_flow *qf =
+			&source_port->emigration_target_qfs[i];
+		uint8_t dest_port_id =
+			source_port->emigration_target_port_ids[i];
+		struct dsw_port *dest_port = &dsw->ports[dest_port_id];
+
+		if (event->queue_id == qf->queue_id &&
+		    dsw_flow_id_hash(event->flow_id) == qf->flow_hash) {
+			/* No need to care about bursting forwarded
+			 * events (to the destination port's in_ring),
+			 * since migration doesn't happen very often,
+			 * and also the majority of the dequeued
+			 * events will likely *not* be forwarded.
+			 */
+			while (rte_event_ring_enqueue_burst(dest_port->in_ring,
+							    event, 1,
+							    NULL) != 1)
+				rte_pause();
+			return;
+		}
 	}
+
+	/* Event did not belong to the emigrated flows */
+	dsw_port_buffer_in_buffer(source_port, event);
 }
 
 static void
@@ -1114,6 +1175,9 @@ dsw_port_move_emigrating_flows(struct dsw_evdev *dsw,
 {
 	uint8_t i;
 
+	/* There may be events lingering in the output buffer from
+	 * prior to the pause took effect.
+	 */
 	dsw_port_flush_out_buffers(dsw, source_port);
 
 	for (i = 0; i < source_port->emigration_targets_len; i++) {
@@ -1137,12 +1201,21 @@ dsw_port_move_emigrating_flows(struct dsw_evdev *dsw,
 
 	dsw_port_flush_no_longer_paused_events(dsw, source_port);
 
+	/* Processing migrating flows during migration may have
+	 * produced events to paused flows, including the flows which
+	 * were being migrated. Flushing the output buffers before
+	 * unpausing the flows on other ports assures that such events
+	 * are seen *before* any events produced by processing the
+	 * migrating flows on the new port.
+	 */
+	dsw_port_flush_out_buffers(dsw, source_port);
+
 	/* Flow table update and migration destination port's enqueues
 	 * must be seen before the control message.
 	 */
 	rte_smp_wmb();
 
-	dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_UNPAUS_REQ,
+	dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_UNPAUSE_REQ,
 			       source_port->emigration_target_qfs,
 			       source_port->emigration_targets_len);
 	source_port->cfm_cnt = 0;
@@ -1154,7 +1227,7 @@ dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
 {
 	port->cfm_cnt++;
 
-	if (port->cfm_cnt == (dsw->num_ports-1)) {
+	if (port->cfm_cnt == (dsw->num_ports - 1)) {
 		switch (port->migration_state) {
 		case DSW_MIGRATION_STATE_PAUSING:
 			dsw_port_move_emigrating_flows(dsw, port);
@@ -1164,7 +1237,7 @@ dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
 						RTE_SCHED_TYPE_ATOMIC);
 			break;
 		default:
-			RTE_ASSERT(0);
+			RTE_VERIFY(0);
 			break;
 		}
 	}
@@ -1177,12 +1250,12 @@ dsw_port_ctl_process(struct dsw_evdev *dsw, struct dsw_port *port)
 
 	if (dsw_port_ctl_dequeue(port, &msg) == 0) {
 		switch (msg.type) {
-		case DSW_CTL_PAUS_REQ:
+		case DSW_CTL_PAUSE_REQ:
 			dsw_port_handle_pause_flows(dsw, port,
 						    msg.originating_port_id,
 						    msg.qfs, msg.qfs_len);
 			break;
-		case DSW_CTL_UNPAUS_REQ:
+		case DSW_CTL_UNPAUSE_REQ:
 			dsw_port_handle_unpause_flows(dsw, port,
 						      msg.originating_port_id,
 						      msg.qfs, msg.qfs_len);
@@ -1203,19 +1276,18 @@ dsw_port_note_op(struct dsw_port *port, uint16_t num_events)
 static void
 dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port)
 {
-	/* For simplicity (in the migration logic), avoid all
-	 * background processing in case event processing is in
-	 * progress.
-	 */
-	if (port->pending_releases > 0)
-		return;
-
 	/* Polling the control ring is relatively inexpensive, and
 	 * polling it often helps bringing down migration latency, so
 	 * do this for every iteration.
 	 */
 	dsw_port_ctl_process(dsw, port);
 
+	/* Always check if a migration is waiting for pending releases
+	 * to arrive, to minimize the amount of time dequeuing events
+	 * from the port is disabled.
+	 */
+	dsw_port_try_finish_pending(dsw, port);
+
 	/* To avoid considering migration and flushing output buffers
 	 * on every dequeue/enqueue call, the scheduler only performs
 	 * such 'background' tasks every nth
@@ -1260,8 +1332,8 @@ static __rte_always_inline uint16_t
 dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 				const struct rte_event events[],
 				uint16_t events_len, bool op_types_known,
-				uint16_t num_new, uint16_t num_release,
-				uint16_t num_non_release)
+				uint16_t num_new, uint16_t num_forward,
+				uint16_t num_release)
 {
 	struct dsw_evdev *dsw = source_port->dsw;
 	bool enough_credits;
@@ -1295,14 +1367,14 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 	if (!op_types_known)
 		for (i = 0; i < events_len; i++) {
 			switch (events[i].op) {
-			case RTE_EVENT_OP_RELEASE:
-				num_release++;
-				break;
 			case RTE_EVENT_OP_NEW:
 				num_new++;
-				/* Falls through. */
-			default:
-				num_non_release++;
+				break;
+			case RTE_EVENT_OP_FORWARD:
+				num_forward++;
+				break;
+			case RTE_EVENT_OP_RELEASE:
+				num_release++;
 				break;
 			}
 		}
@@ -1318,15 +1390,20 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 		     source_port->new_event_threshold))
 		return 0;
 
-	enough_credits = dsw_port_acquire_credits(dsw, source_port,
-						  num_non_release);
+	enough_credits = dsw_port_acquire_credits(dsw, source_port, num_new);
 	if (unlikely(!enough_credits))
 		return 0;
 
-	source_port->pending_releases -= num_release;
+	dsw_port_return_credits(dsw, source_port, num_release);
+
+	/* This may seem harsh, but it's important for an application
+	 * to get early feedback for cases where it fails to stick to
+	 * the API contract.
+	 */
+	RTE_VERIFY(num_forward + num_release <= source_port->pending_releases);
+	source_port->pending_releases -= (num_forward + num_release);
 
-	dsw_port_enqueue_stats(source_port, num_new,
-			       num_non_release-num_new, num_release);
+	dsw_port_enqueue_stats(source_port, num_new, num_forward, num_release);
 
 	for (i = 0; i < events_len; i++) {
 		const struct rte_event *event = &events[i];
@@ -1338,9 +1415,9 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port,
 	}
 
 	DSW_LOG_DP_PORT(DEBUG, source_port->id, "%d non-release events "
-			"accepted.\n", num_non_release);
+			"accepted.\n", num_new + num_forward);
 
-	return (num_non_release + num_release);
+	return (num_new + num_forward + num_release);
 }
 
 uint16_t
@@ -1367,7 +1444,7 @@ dsw_event_enqueue_new_burst(void *port, const struct rte_event events[],
 
 	return dsw_event_enqueue_burst_generic(source_port, events,
 					       events_len, true, events_len,
-					       0, events_len);
+					       0, 0);
 }
 
 uint16_t
@@ -1380,8 +1457,8 @@ dsw_event_enqueue_forward_burst(void *port, const struct rte_event events[],
 		events_len = source_port->enqueue_depth;
 
 	return dsw_event_enqueue_burst_generic(source_port, events,
-					       events_len, true, 0, 0,
-					       events_len);
+					       events_len, true, 0,
+					       events_len, 0);
 }
 
 uint16_t
@@ -1435,8 +1512,17 @@ static uint16_t
 dsw_port_dequeue_burst(struct dsw_port *port, struct rte_event *events,
 		       uint16_t num)
 {
-	if (unlikely(port->in_buffer_len > 0)) {
-		uint16_t dequeued = RTE_MIN(num, port->in_buffer_len);
+	enum dsw_migration_state state = port->migration_state;
+	uint16_t dequeued;
+
+	if (unlikely(state == DSW_MIGRATION_STATE_FINISH_PENDING))
+		/* Do not produce new items of work - only finish
+		 * outstanding (unreleased) events, to allow the
+		 * migration procedure to continue.
+		 */
+		dequeued = 0;
+	else if (unlikely(port->in_buffer_len > 0)) {
+		dequeued = RTE_MIN(num, port->in_buffer_len);
 
 		rte_memcpy(events, &port->in_buffer[port->in_buffer_start],
 			   dequeued * sizeof(struct rte_event));
@@ -1446,43 +1532,24 @@ dsw_port_dequeue_burst(struct dsw_port *port, struct rte_event *events,
 
 		if (port->in_buffer_len == 0)
 			port->in_buffer_start = 0;
-
-		return dequeued;
+	} else {
+		dequeued = rte_event_ring_dequeue_burst(port->in_ring,
+							events, num, NULL);
+
+		/* Stash incoming events belonging to migrating flows,
+		 * to avoid having to deal with forwarded events to
+		 * flows which are also in the process of being
+		 * migrated. A failure to do so leads to reordering,
+		 * since paused events on the source port may be
+		 * flushed after paused events on the migration
+		 * destination port.
+		 */
+		if (unlikely(state == DSW_MIGRATION_STATE_PAUSING))
+			dsw_port_stash_any_migrating_events(port, events,
+							    &dequeued);
 	}
 
-	return rte_event_ring_dequeue_burst(port->in_ring, events, num, NULL);
-}
-
-static void
-dsw_port_stash_migrating_events(struct dsw_port *port,
-				struct rte_event *events, uint16_t *num)
-{
-	uint16_t i;
-
-	/* The assumption here - performance-wise - is that events
-	 * belonging to migrating flows are relatively rare.
-	 */
-	for (i = 0; i < (*num); ) {
-		struct rte_event *event = &events[i];
-		uint16_t flow_hash;
-
-		flow_hash = dsw_flow_id_hash(event->flow_id);
-
-		if (unlikely(dsw_port_is_flow_migrating(port, event->queue_id,
-							flow_hash))) {
-			uint16_t left;
-
-			dsw_port_stash_migrating_event(port, event);
-
-			(*num)--;
-			left = *num - i;
-
-			if (left > 0)
-				memmove(event, event + 1,
-					left * sizeof(struct rte_event));
-		} else
-			i++;
-	}
+	return dequeued;
 }
 
 uint16_t
@@ -1493,7 +1560,12 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 	struct dsw_evdev *dsw = source_port->dsw;
 	uint16_t dequeued;
 
-	source_port->pending_releases = 0;
+	if (source_port->implicit_release) {
+		dsw_port_return_credits(dsw, port,
+					source_port->pending_releases);
+
+		source_port->pending_releases = 0;
+	}
 
 	dsw_port_bg_process(dsw, source_port);
 
@@ -1502,12 +1574,7 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 
 	dequeued = dsw_port_dequeue_burst(source_port, events, num);
 
-	if (unlikely(source_port->migration_state ==
-		     DSW_MIGRATION_STATE_PAUSING))
-		dsw_port_stash_migrating_events(source_port, events,
-						&dequeued);
-
-	source_port->pending_releases = dequeued;
+	source_port->pending_releases += dequeued;
 
 	dsw_port_load_record(source_port, dequeued);
 
@@ -1517,8 +1584,6 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n",
 				dequeued);
 
-		dsw_port_return_credits(dsw, source_port, dequeued);
-
 		/* One potential optimization one might think of is to
 		 * add a migration state (prior to 'pausing'), and
 		 * only record seen events when the port is in this
-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] event/dsw: support explicit release only mode
  2024-06-07 13:36     ` [PATCH v3] " Mattias Rönnblom
@ 2024-06-08  6:14       ` Jerin Jacob
  2024-06-08 16:02         ` Mattias Rönnblom
  0 siblings, 1 reply; 10+ messages in thread
From: Jerin Jacob @ 2024-06-08  6:14 UTC (permalink / raw)
  To: Mattias Rönnblom
  Cc: Jerin Jacob, dev, hofors, bruce.richardson, Peter Nilsson J,
	Svante Järvstråt, Heng Wang

On Fri, Jun 7, 2024 at 7:17 PM Mattias Rönnblom
<mattias.ronnblom@ericsson.com> wrote:
>
> Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
> DSW event device.
>
> This feature may be used by an EAL thread to pull more work from the
> work scheduler, without giving up the option to forward events
> originating from a previous dequeue batch. This in turn allows an EAL
> thread to be productive while waiting for a hardware accelerator to
> complete some operation.
>
> Prior to this change, DSW didn't make any distinction between
> RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
> new events would be backpressured earlier.
>
> After this change, DSW tracks the number of released events (i.e.,
> events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE) that has
> been enqueued.
>
> For efficiency reasons, DSW does not track the identity of individual
> events. This in turn implies that a certain stage in the flow
> migration process, DSW must wait for all pending releases (on the
> migration source port, only) to be received from the application, to
> assure that no event pertaining to any of the to-be-migrated flows are
> being processed.
>
> With this change, DSW starts making a distinction between forward and
> new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
> events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
> have a corresponding dequeued event from a previous dequeue batch.
>
> Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
> unaffected by this change.
>
> A side-effect of the tweaked DSW migration logic is that the migration
> latency is reduced, regardless if implicit release is enabled or not.
>
> Another side-effect is that migrated flows are now not processed
> during any part of the migration procedure. An upside of this change
> it reduces the load of the overloaded port. A downside is it
> introduces slightly more jitter for the migrated flows.
>
> This patch is contains various minor refactorings, improved
> formatting, fixed spelling, and the removal of unnessary memory
> barriers.

Move changelog after ---

>
> v3:
>  * Fix broken RTE_ASSERT()s. (Jerin Jacob)
>
> v2:
>   * Remove redundant memory barriers.
>   * Discontinue processing of migrated flows throughout the migration
>     procedure. This is a part of a fix to address a reordering issue
>     v1 of this patch introduced.
>   * Added entry in the release notes.
>   * Fix spelling issues in commit message.
>
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---
>  doc/guides/rel_notes/release_24_07.rst |   7 +
>  drivers/event/dsw/dsw_evdev.c          |   8 +-
>  drivers/event/dsw/dsw_evdev.h          |   7 +-
>  drivers/event/dsw/dsw_event.c          | 405 ++++++++++++++-----------
>  4 files changed, 254 insertions(+), 173 deletions(-)
>
> diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst
> index a69f24cf99..706cc71212 100644
> --- a/doc/guides/rel_notes/release_24_07.rst
> +++ b/doc/guides/rel_notes/release_24_07.rst
> @@ -24,6 +24,13 @@ DPDK Release 24.07
>  New Features
>  ------------
>
> +* **Updated the DSW event device.**
> +
> +  * Added support for ``RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE``,
> +    allowing applications to take on new tasks without having completed
> +    (released) the previous event batch. This in turn facilities DSW
> +    use alongside high-latency look-aside hardware accelerators.
> +
>  .. This section should contain new features added in this release.
>     Sample format:

Update should be here(after the template).


Fixed above issues and applied to dpdk-next-eventdev/for-main. Thanks

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] event/dsw: support explicit release only mode
  2024-06-08  6:14       ` Jerin Jacob
@ 2024-06-08 16:02         ` Mattias Rönnblom
  0 siblings, 0 replies; 10+ messages in thread
From: Mattias Rönnblom @ 2024-06-08 16:02 UTC (permalink / raw)
  To: Jerin Jacob, Mattias Rönnblom
  Cc: Jerin Jacob, dev, bruce.richardson, Peter Nilsson J,
	Svante Järvstråt, Heng Wang

On 2024-06-08 08:14, Jerin Jacob wrote:
> On Fri, Jun 7, 2024 at 7:17 PM Mattias Rönnblom
> <mattias.ronnblom@ericsson.com> wrote:
>>
>> Add the RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capability to the
>> DSW event device.
>>
>> This feature may be used by an EAL thread to pull more work from the
>> work scheduler, without giving up the option to forward events
>> originating from a previous dequeue batch. This in turn allows an EAL
>> thread to be productive while waiting for a hardware accelerator to
>> complete some operation.
>>
>> Prior to this change, DSW didn't make any distinction between
>> RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_NEW type events, other than that
>> new events would be backpressured earlier.
>>
>> After this change, DSW tracks the number of released events (i.e.,
>> events of type RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE) that has
>> been enqueued.
>>
>> For efficiency reasons, DSW does not track the identity of individual
>> events. This in turn implies that a certain stage in the flow
>> migration process, DSW must wait for all pending releases (on the
>> migration source port, only) to be received from the application, to
>> assure that no event pertaining to any of the to-be-migrated flows are
>> being processed.
>>
>> With this change, DSW starts making a distinction between forward and
>> new type events for credit allocation purposes. Only RTE_EVENT_OP_NEW
>> events needs credits. All events marked as RTE_EVENT_OP_FORWARD must
>> have a corresponding dequeued event from a previous dequeue batch.
>>
>> Flow migration for flows on RTE_SCHED_TYPE_PARALLEL queues remains
>> unaffected by this change.
>>
>> A side-effect of the tweaked DSW migration logic is that the migration
>> latency is reduced, regardless if implicit release is enabled or not.
>>
>> Another side-effect is that migrated flows are now not processed
>> during any part of the migration procedure. An upside of this change
>> it reduces the load of the overloaded port. A downside is it
>> introduces slightly more jitter for the migrated flows.
>>
>> This patch is contains various minor refactorings, improved
>> formatting, fixed spelling, and the removal of unnessary memory
>> barriers.
> 
> Move changelog after ---
> 

I never remember where it should go, so I checked an old patch of mine, 
were I also got it wrong...

Anyway, thanks.

>>
>> v3:
>>   * Fix broken RTE_ASSERT()s. (Jerin Jacob)
>>
>> v2:
>>    * Remove redundant memory barriers.
>>    * Discontinue processing of migrated flows throughout the migration
>>      procedure. This is a part of a fix to address a reordering issue
>>      v1 of this patch introduced.
>>    * Added entry in the release notes.
>>    * Fix spelling issues in commit message.
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> ---
>>   doc/guides/rel_notes/release_24_07.rst |   7 +
>>   drivers/event/dsw/dsw_evdev.c          |   8 +-
>>   drivers/event/dsw/dsw_evdev.h          |   7 +-
>>   drivers/event/dsw/dsw_event.c          | 405 ++++++++++++++-----------
>>   4 files changed, 254 insertions(+), 173 deletions(-)
>>
>> diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst
>> index a69f24cf99..706cc71212 100644
>> --- a/doc/guides/rel_notes/release_24_07.rst
>> +++ b/doc/guides/rel_notes/release_24_07.rst
>> @@ -24,6 +24,13 @@ DPDK Release 24.07
>>   New Features
>>   ------------
>>
>> +* **Updated the DSW event device.**
>> +
>> +  * Added support for ``RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE``,
>> +    allowing applications to take on new tasks without having completed
>> +    (released) the previous event batch. This in turn facilities DSW
>> +    use alongside high-latency look-aside hardware accelerators.
>> +
>>   .. This section should contain new features added in this release.
>>      Sample format:
> 
> Update should be here(after the template).
> 
> 
> Fixed above issues and applied to dpdk-next-eventdev/for-main. Thanks

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-06-08 16:03 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-09 18:33 [RFC] event/dsw: support explicit release only mode Mattias Rönnblom
2024-05-24 19:24 ` [PATCH] " Mattias Rönnblom
2024-05-27 15:35   ` Jerin Jacob
2024-05-27 16:08     ` Mattias Rönnblom
2024-05-27 17:17       ` Jerin Jacob
2024-06-05 13:38   ` [PATCH v2] " Mattias Rönnblom
2024-06-06  9:59     ` Jerin Jacob
2024-06-07 13:36     ` [PATCH v3] " Mattias Rönnblom
2024-06-08  6:14       ` Jerin Jacob
2024-06-08 16:02         ` Mattias Rönnblom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).