From: "Mattias Rönnblom" <mattias.ronnblom@ericsson.com>
To: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Cc: "Bruce Richardson" <bruce.richardson@intel.com>,
dev@dpdk.org, "Mattias Rönnblom" <mattias.ronnblom@ericsson.com>
Subject: [dpdk-dev] [PATCH v4 06/10] event/dsw: add DSW port load measurements
Date: Tue, 18 Sep 2018 14:45:10 +0200 [thread overview]
Message-ID: <20180918124514.10615-7-mattias.ronnblom@ericsson.com> (raw)
In-Reply-To: <20180918124514.10615-1-mattias.ronnblom@ericsson.com>
The DSW event device port now attempts to estimate its load (i.e. how
busy it is). This is required for load balancing to work (although
load balancing is not included in this patch), and may also be useful
for debugging purposes.
Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
drivers/event/dsw/dsw_evdev.c | 14 +++++
drivers/event/dsw/dsw_evdev.h | 40 +++++++++++++
drivers/event/dsw/dsw_event.c | 109 ++++++++++++++++++++++++++++++++++
3 files changed, 163 insertions(+)
diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index 40a7435be..bcfa17bab 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -4,6 +4,7 @@
#include <stdbool.h>
+#include <rte_cycles.h>
#include <rte_eventdev_pmd.h>
#include <rte_eventdev_pmd_vdev.h>
#include <rte_random.h>
@@ -43,6 +44,11 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
port->in_ring = in_ring;
+ rte_atomic16_init(&port->load);
+
+ port->load_update_interval =
+ (DSW_LOAD_UPDATE_INTERVAL * rte_get_timer_hz()) / US_PER_S;
+
dev->data->ports[port_id] = port;
return 0;
@@ -240,11 +246,19 @@ static int
dsw_start(struct rte_eventdev *dev)
{
struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+ uint16_t i;
+ uint64_t now;
rte_atomic32_init(&dsw->credits_on_loan);
initial_flow_to_port_assignment(dsw);
+ now = rte_get_timer_cycles();
+ for (i = 0; i < dsw->num_ports; i++) {
+ dsw->ports[i].measurement_start = now;
+ dsw->ports[i].busy_start = now;
+ }
+
return 0;
}
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index f8e94e4a4..a5399dda5 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -36,6 +36,15 @@
*/
#define DSW_PARALLEL_FLOWS (1024)
+/* 'Background tasks' are polling the control rings for *
+ * migration-related messages, or flush the output buffer (so
+ * buffered events doesn't linger too long). Shouldn't be too low,
+ * since the system won't benefit from the 'batching' effects from
+ * the output buffer, and shouldn't be too high, since it will make
+ * buffered events linger too long in case the port goes idle.
+ */
+#define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
+
/* Avoid making small 'loans' from the central in-flight event credit
* pool, to improve efficiency.
*/
@@ -50,6 +59,22 @@
*/
#define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
+#define DSW_MAX_LOAD (INT16_MAX)
+#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
+#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
+
+/* The thought behind keeping the load update interval shorter than
+ * the migration interval is that the load from newly migrated flows
+ * should 'show up' on the load measurement before new migrations are
+ * considered. This is to avoid having too many flows, from too many
+ * source ports, to be migrated too quickly to a lightly loaded port -
+ * in particular since this might cause the system to oscillate.
+ */
+#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
+#define DSW_OLD_LOAD_WEIGHT (1)
+
+#define DSW_MIGRATION_INTERVAL (1000)
+
struct dsw_port {
uint16_t id;
@@ -71,10 +96,25 @@ struct dsw_port {
uint16_t next_parallel_flow_id;
+ uint16_t ops_since_bg_task;
+
+ uint64_t last_bg;
+
+ /* For port load measurement. */
+ uint64_t next_load_update;
+ uint64_t load_update_interval;
+ uint64_t measurement_start;
+ uint64_t busy_start;
+ uint64_t busy_cycles;
+ uint64_t total_busy_cycles;
+
uint16_t out_buffer_len[DSW_MAX_PORTS];
struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
struct rte_event_ring *in_ring __rte_cache_aligned;
+
+ /* Estimate of current port load. */
+ rte_atomic16_t load __rte_cache_aligned;
} __rte_cache_aligned;
struct dsw_queue {
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index 4a3af8ecd..f326147c9 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -7,6 +7,7 @@
#include <stdbool.h>
#include <rte_atomic.h>
+#include <rte_cycles.h>
#include <rte_random.h>
static bool
@@ -75,6 +76,70 @@ dsw_port_return_credits(struct dsw_evdev *dsw, struct dsw_port *port,
}
}
+static void
+dsw_port_load_record(struct dsw_port *port, unsigned int dequeued)
+{
+ if (dequeued > 0 && port->busy_start == 0)
+ /* work period begins */
+ port->busy_start = rte_get_timer_cycles();
+ else if (dequeued == 0 && port->busy_start > 0) {
+ /* work period ends */
+ uint64_t work_period =
+ rte_get_timer_cycles() - port->busy_start;
+ port->busy_cycles += work_period;
+ port->busy_start = 0;
+ }
+}
+
+static int16_t
+dsw_port_load_close_period(struct dsw_port *port, uint64_t now)
+{
+ uint64_t passed = now - port->measurement_start;
+ uint64_t busy_cycles = port->busy_cycles;
+
+ if (port->busy_start > 0) {
+ busy_cycles += (now - port->busy_start);
+ port->busy_start = now;
+ }
+
+ int16_t load = (DSW_MAX_LOAD * busy_cycles) / passed;
+
+ port->measurement_start = now;
+ port->busy_cycles = 0;
+
+ port->total_busy_cycles += busy_cycles;
+
+ return load;
+}
+
+static void
+dsw_port_load_update(struct dsw_port *port, uint64_t now)
+{
+ int16_t old_load;
+ int16_t period_load;
+ int16_t new_load;
+
+ old_load = rte_atomic16_read(&port->load);
+
+ period_load = dsw_port_load_close_period(port, now);
+
+ new_load = (period_load + old_load*DSW_OLD_LOAD_WEIGHT) /
+ (DSW_OLD_LOAD_WEIGHT+1);
+
+ rte_atomic16_set(&port->load, new_load);
+}
+
+static void
+dsw_port_consider_load_update(struct dsw_port *port, uint64_t now)
+{
+ if (now < port->next_load_update)
+ return;
+
+ port->next_load_update = now + port->load_update_interval;
+
+ dsw_port_load_update(port, now);
+}
+
static uint8_t
dsw_schedule(struct dsw_evdev *dsw, uint8_t queue_id, uint16_t flow_hash)
{
@@ -196,6 +261,39 @@ dsw_port_buffer_event(struct dsw_evdev *dsw, struct dsw_port *source_port,
dsw_port_buffer_non_paused(dsw, source_port, dest_port_id, event);
}
+static void
+dsw_port_note_op(struct dsw_port *port, uint16_t num_events)
+{
+ /* To pull the control ring reasonbly often on busy ports,
+ * each dequeued/enqueued event is considered an 'op' too.
+ */
+ port->ops_since_bg_task += (num_events+1);
+}
+
+static void
+dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port);
+
+static void
+dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port)
+{
+ if (unlikely(port->ops_since_bg_task >= DSW_MAX_PORT_OPS_PER_BG_TASK)) {
+ uint64_t now;
+
+ now = rte_get_timer_cycles();
+
+ port->last_bg = now;
+
+ /* Logic to avoid having events linger in the output
+ * buffer too long.
+ */
+ dsw_port_flush_out_buffers(dsw, port);
+
+ dsw_port_consider_load_update(port, now);
+
+ port->ops_since_bg_task = 0;
+ }
+}
+
static void
dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port)
{
@@ -225,6 +323,8 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
DSW_LOG_DP_PORT(DEBUG, source_port->id, "Attempting to enqueue %d "
"events to port %d.\n", events_len, source_port->id);
+ dsw_port_bg_process(dsw, source_port);
+
/* XXX: For performance (=ring efficiency) reasons, the
* scheduler relies on internal non-ring buffers instead of
* immediately sending the event to the destination ring. For
@@ -238,6 +338,7 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
* considered.
*/
if (unlikely(events_len == 0)) {
+ dsw_port_note_op(source_port, DSW_MAX_PORT_OPS_PER_BG_TASK);
dsw_port_flush_out_buffers(dsw, source_port);
return 0;
}
@@ -245,6 +346,8 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
if (unlikely(events_len > source_port->enqueue_depth))
events_len = source_port->enqueue_depth;
+ dsw_port_note_op(source_port, events_len);
+
if (!op_types_known)
for (i = 0; i < events_len; i++) {
switch (events[i].op) {
@@ -337,6 +440,8 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
source_port->pending_releases = 0;
+ dsw_port_bg_process(dsw, source_port);
+
if (unlikely(num > source_port->dequeue_depth))
num = source_port->dequeue_depth;
@@ -344,6 +449,10 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
source_port->pending_releases = dequeued;
+ dsw_port_load_record(source_port, dequeued);
+
+ dsw_port_note_op(source_port, dequeued);
+
if (dequeued > 0) {
DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n",
dequeued);
--
2.17.1
next prev parent reply other threads:[~2018-09-18 12:45 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-18 12:45 [dpdk-dev] [PATCH v4 00/10] A Distributed Software Event Device Mattias Rönnblom
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 01/10] event/dsw: add DSW device registration and build system Mattias Rönnblom
2018-10-02 13:43 ` Ferruh Yigit
2018-10-04 11:21 ` [dpdk-dev] [PATCH v2] event/dsw: fix icc build Mattias Rönnblom
2018-10-04 14:17 ` Jerin Jacob
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 02/10] event/dsw: add DSW device and queue configuration Mattias Rönnblom
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 03/10] event/dsw: add DSW port configuration Mattias Rönnblom
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 04/10] event/dsw: add support in DSW for linking/unlinking ports Mattias Rönnblom
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 05/10] event/dsw: add DSW event scheduling and device start/stop Mattias Rönnblom
2018-09-18 12:45 ` Mattias Rönnblom [this message]
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 07/10] event/dsw: add load balancing to the DSW event device Mattias Rönnblom
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 08/10] event/dsw: let DSW event device sort events on dequeue Mattias Rönnblom
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 09/10] event/dsw: implement eventdev 'xstats' counters in DSW Mattias Rönnblom
2018-09-18 12:45 ` [dpdk-dev] [PATCH v4 10/10] event/dsw: include DSW event device documentation Mattias Rönnblom
2018-09-19 11:53 ` [dpdk-dev] [PATCH v4 00/10] A Distributed Software Event Device Jerin Jacob
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180918124514.10615-7-mattias.ronnblom@ericsson.com \
--to=mattias.ronnblom@ericsson.com \
--cc=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
--cc=jerin.jacob@caviumnetworks.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).