DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Mattias Rönnblom" <mattias.ronnblom@ericsson.com>
To: jerin.jacob@caviumnetworks.com
Cc: bruce.richardson@intel.com, dev@dpdk.org,
	"Mattias Rönnblom" <mattias.ronnblom@ericsson.com>
Subject: [dpdk-dev] [PATCH 06/10] eventdev: add DSW port load measurements
Date: Thu, 30 Aug 2018 16:27:15 +0200	[thread overview]
Message-ID: <20180830142719.28569-7-mattias.ronnblom@ericsson.com> (raw)
In-Reply-To: <20180830142719.28569-1-mattias.ronnblom@ericsson.com>

The DSW event device port now attempts to estimate its load (i.e. how
busy it is). This is required for load balancing to work (although
load balancing is not included in this patch), and may also be useful
for debugging purposes.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 drivers/event/dsw/dsw_evdev.c |  14 +++++
 drivers/event/dsw/dsw_evdev.h |  40 +++++++++++++
 drivers/event/dsw/dsw_event.c | 109 ++++++++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+)

diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index 40a7435be..bcfa17bab 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -4,6 +4,7 @@
 
 #include <stdbool.h>
 
+#include <rte_cycles.h>
 #include <rte_eventdev_pmd.h>
 #include <rte_eventdev_pmd_vdev.h>
 #include <rte_random.h>
@@ -43,6 +44,11 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
 
 	port->in_ring = in_ring;
 
+	rte_atomic16_init(&port->load);
+
+	port->load_update_interval =
+		(DSW_LOAD_UPDATE_INTERVAL * rte_get_timer_hz()) / US_PER_S;
+
 	dev->data->ports[port_id] = port;
 
 	return 0;
@@ -240,11 +246,19 @@ static int
 dsw_start(struct rte_eventdev *dev)
 {
 	struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+	uint16_t i;
+	uint64_t now;
 
 	rte_atomic32_init(&dsw->credits_on_loan);
 
 	initial_flow_to_port_assignment(dsw);
 
+	now = rte_get_timer_cycles();
+	for (i = 0; i < dsw->num_ports; i++) {
+		dsw->ports[i].measurement_start = now;
+		dsw->ports[i].busy_start = now;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index 73a3d00e7..37f7c80cc 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -36,6 +36,15 @@
  */
 #define DSW_PARALLEL_FLOWS (1024)
 
+/* 'Background tasks' are polling the control rings for *
+ *  migration-related messages, or flush the output buffer (so
+ *  buffered events doesn't linger too long). Shouldn't be too low,
+ *  since the system won't benefit from the 'batching' effects from
+ *  the output buffer, and shouldn't be too high, since it will make
+ *  buffered events linger too long in case the port goes idle.
+ */
+#define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
+
 /* Avoid making small 'loans' from the central in-flight event credit
  * pool, to improve efficiency.
  */
@@ -50,6 +59,22 @@
  */
 #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
 
+#define DSW_MAX_LOAD (INT16_MAX)
+#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
+#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
+
+/* The thought behind keeping the load update interval shorter than
+ * the migration interval is that the load from newly migrated flows
+ * should 'show up' on the load measurement before new migrations are
+ * considered. This is to avoid having too many flows, from too many
+ * source ports, to be migrated too quickly to a lightly loaded port -
+ * in particular since this might cause the system to oscillate.
+ */
+#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
+#define DSW_OLD_LOAD_WEIGHT (1)
+
+#define DSW_MIGRATION_INTERVAL (1000)
+
 struct dsw_port {
 	uint16_t id;
 
@@ -71,10 +96,25 @@ struct dsw_port {
 
 	uint16_t next_parallel_flow_id;
 
+	uint16_t ops_since_bg_task;
+
+	uint64_t last_bg;
+
+	/* For port load measurement. */
+	uint64_t next_load_update;
+	uint64_t load_update_interval;
+	uint64_t measurement_start;
+	uint64_t busy_start;
+	uint64_t busy_cycles;
+	uint64_t total_busy_cycles;
+
 	uint16_t out_buffer_len[DSW_MAX_PORTS];
 	struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
 
 	struct rte_event_ring *in_ring __rte_cache_aligned;
+
+	/* Estimate of current port load. */
+	rte_atomic16_t load __rte_cache_aligned;
 } __rte_cache_aligned;
 
 struct dsw_queue {
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index d38e1fee6..c9b500bf8 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -7,6 +7,7 @@
 #include <stdbool.h>
 
 #include <rte_atomic.h>
+#include <rte_cycles.h>
 #include <rte_random.h>
 
 static bool
@@ -75,6 +76,70 @@ dsw_port_return_credits(struct dsw_evdev *dsw, struct dsw_port *port,
 	}
 }
 
+static void
+dsw_port_load_record(struct dsw_port *port, unsigned int dequeued)
+{
+	if (dequeued > 0 && port->busy_start == 0)
+		/* work period begins */
+		port->busy_start = rte_get_timer_cycles();
+	else if (dequeued == 0 && port->busy_start > 0) {
+		/* work period ends */
+		uint64_t work_period =
+			rte_get_timer_cycles() - port->busy_start;
+		port->busy_cycles += work_period;
+		port->busy_start = 0;
+	}
+}
+
+static int16_t
+dsw_port_load_close_period(struct dsw_port *port, uint64_t now)
+{
+	uint64_t passed = now - port->measurement_start;
+	uint64_t busy_cycles = port->busy_cycles;
+
+	if (port->busy_start > 0) {
+		busy_cycles += (now - port->busy_start);
+		port->busy_start = now;
+	}
+
+	int16_t load = (DSW_MAX_LOAD * busy_cycles) / passed;
+
+	port->measurement_start = now;
+	port->busy_cycles = 0;
+
+	port->total_busy_cycles += busy_cycles;
+
+	return load;
+}
+
+static void
+dsw_port_load_update(struct dsw_port *port, uint64_t now)
+{
+	int16_t old_load;
+	int16_t period_load;
+	int16_t new_load;
+
+	old_load = rte_atomic16_read(&port->load);
+
+	period_load = dsw_port_load_close_period(port, now);
+
+	new_load = (period_load + old_load*DSW_OLD_LOAD_WEIGHT) /
+		(DSW_OLD_LOAD_WEIGHT+1);
+
+	rte_atomic16_set(&port->load, new_load);
+}
+
+static void
+dsw_port_consider_load_update(struct dsw_port *port, uint64_t now)
+{
+	if (now < port->next_load_update)
+		return;
+
+	port->next_load_update = now + port->load_update_interval;
+
+	dsw_port_load_update(port, now);
+}
+
 static uint8_t
 dsw_schedule(struct dsw_evdev *dsw, uint8_t queue_id, uint16_t flow_hash)
 {
@@ -196,6 +261,39 @@ dsw_port_buffer_event(struct dsw_evdev *dsw, struct dsw_port *source_port,
 	dsw_port_buffer_non_paused(dsw, source_port, dest_port_id, event);
 }
 
+static void
+dsw_port_note_op(struct dsw_port *port, uint16_t num_events)
+{
+	/* To pull the control ring reasonbly often on busy ports,
+	 * each dequeued/enqueued event is considered an 'op' too.
+	 */
+	port->ops_since_bg_task += (num_events+1);
+}
+
+static void
+dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port);
+
+static void
+dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port)
+{
+	if (unlikely(port->ops_since_bg_task >= DSW_MAX_PORT_OPS_PER_BG_TASK)) {
+		uint64_t now;
+
+		now = rte_get_timer_cycles();
+
+		port->last_bg = now;
+
+		/* Logic to avoid having events linger in the output
+		 * buffer too long.
+		 */
+		dsw_port_flush_out_buffers(dsw, port);
+
+		dsw_port_consider_load_update(port, now);
+
+		port->ops_since_bg_task = 0;
+	}
+}
+
 static void
 dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port)
 {
@@ -225,6 +323,8 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
 	DSW_LOG_DP_PORT(DEBUG, source_port->id, "Attempting to enqueue %d "
 			"events to port %d.\n", events_len, source_port->id);
 
+	dsw_port_bg_process(dsw, source_port);
+
 	/* XXX: For performance (=ring efficiency) reasons, the
 	 * scheduler relies on internal non-ring buffers instead of
 	 * immediately sending the event to the destination ring. For
@@ -238,6 +338,7 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
 	 * considered.
 	 */
 	if (unlikely(events_len == 0)) {
+		dsw_port_note_op(source_port, DSW_MAX_PORT_OPS_PER_BG_TASK);
 		dsw_port_flush_out_buffers(dsw, source_port);
 		return 0;
 	}
@@ -245,6 +346,8 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
 	if (unlikely(events_len > source_port->enqueue_depth))
 		events_len = source_port->enqueue_depth;
 
+	dsw_port_note_op(source_port, events_len);
+
 	if (!op_types_known)
 		for (i = 0; i < events_len; i++) {
 			switch (events[i].op) {
@@ -337,6 +440,8 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 
 	source_port->pending_releases = 0;
 
+	dsw_port_bg_process(dsw, source_port);
+
 	if (unlikely(num > source_port->dequeue_depth))
 		num = source_port->dequeue_depth;
 
@@ -344,6 +449,10 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
 
 	source_port->pending_releases = dequeued;
 
+	dsw_port_load_record(source_port, dequeued);
+
+	dsw_port_note_op(source_port, dequeued);
+
 	if (dequeued > 0) {
 		DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n",
 				dequeued);
-- 
2.17.1

  parent reply	other threads:[~2018-08-30 14:27 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-30 14:27 [dpdk-dev] [PATCH 00/10] Add the Distributed Software Event Device Mattias Rönnblom
2018-08-30 14:27 ` [dpdk-dev] [PATCH 01/10] eventdev: add DSW device registration and build system Mattias Rönnblom
2018-08-30 14:27 ` [dpdk-dev] [PATCH 02/10] eventdev: add DSW device and queue configuration Mattias Rönnblom
2018-08-30 14:27 ` [dpdk-dev] [PATCH 03/10] eventdev: add DSW port configuration Mattias Rönnblom
2018-08-30 14:27 ` [dpdk-dev] [PATCH 04/10] eventdev: add support in DSW for linking/unlinking ports Mattias Rönnblom
2018-08-30 14:27 ` [dpdk-dev] [PATCH 05/10] eventdev: add DSW event scheduling and device start/stop Mattias Rönnblom
2018-08-30 14:27 ` Mattias Rönnblom [this message]
2018-08-30 14:27 ` [dpdk-dev] [PATCH 07/10] eventdev: add load balancing to the DSW event device Mattias Rönnblom
2018-08-30 14:27 ` [dpdk-dev] [PATCH 08/10] eventdev: let DSW event device sort events on dequeue Mattias Rönnblom
2018-08-30 14:27 ` [dpdk-dev] [PATCH 09/10] eventdev: implement eventdev 'xstats' counters in DSW Mattias Rönnblom
2018-08-30 14:27 ` [dpdk-dev] [PATCH 10/10] eventdev: include DSW event device documentation Mattias Rönnblom
2018-09-10 12:59   ` Mattias Rönnblom
2018-09-10 13:20     ` Jerin Jacob

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180830142719.28569-7-mattias.ronnblom@ericsson.com \
    --to=mattias.ronnblom@ericsson.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=jerin.jacob@caviumnetworks.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).