DPDK patches and discussions
 help / color / Atom feed
From: Matan Azrad <matan@mellanox.com>
To: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: dev@dpdk.org, Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Subject: [dpdk-dev] [PATCH v2 3/3] vdpa/mlx5: control completion queue event mode
Date: Thu, 25 Jun 2020 13:30:38 +0000
Message-ID: <1593091838-51869-4-git-send-email-matan@mellanox.com> (raw)
In-Reply-To: <1593091838-51869-1-git-send-email-matan@mellanox.com>

The CQ polling is necessary in order to manage guest notifications when
the guest doesn't work with poll mode (callfd != -1).

The CQ polling scheduling method can affect the host CPU utilization and
the traffic bandwidth.

Define 3 modes to control the CQ polling scheduling:

1. A timer thread which automatically adjusts its delays to the coming
   traffic rate.
2. A timer thread with fixed delay time.
3. Interrupts: Each CQE burst arms the CQ in order to get an interrupt
   event in the next traffic burst.

When traffic becomes off, mode 3 is taken automatically.

The interrupt management takes a lot of CPU cycles but forward traffic
event to the guest very fast.

Timer thread save the interrupt overhead but may add delay for the guest
notification.

Add device arguments to control on the mode.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 doc/guides/vdpadevs/mlx5.rst        | 32 ++++++++++++++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.c       | 58 +++++++++++++++++++++++++++++++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.h       | 13 +++++++++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 56 +++++++++++++++++++++++++----------
 4 files changed, 144 insertions(+), 15 deletions(-)

diff --git a/doc/guides/vdpadevs/mlx5.rst b/doc/guides/vdpadevs/mlx5.rst
index dd377af..ab62542 100644
--- a/doc/guides/vdpadevs/mlx5.rst
+++ b/doc/guides/vdpadevs/mlx5.rst
@@ -106,8 +106,40 @@ Run-time configuration
 
 - **ethtool** operations on related kernel interfaces also affect the PMD.
 
+Driver options
+^^^^^^^^^^^^^^
+
 - ``class`` parameter [string]
 
   Select the class of the driver that should probe the device.
   `vdpa` for the mlx5 vDPA driver.
 
+- ``event_mode`` parameter [int]
+
+  0: Completion queue scheduling will be managed by a timer thread which
+     automatically adjusts its delays to the coming traffic rate.
+  1: Completion queue scheduling will be managed by a timer thread with fixed
+     delay time.
+  2: Completion queue scheduling will be managed by interrupts.
+     Each CQ burst arms the CQ in order to get an interrupt event in the next
+     traffic burst.
+
+     Default mode is 0.
+
+- ``event_us`` parameter [int]
+
+  Per mode micro-seconds parameter:
+  0: A nonzero value to set timer step in micro-seconds.
+     The timer thread dynamic delay change steps according to this value.
+     Default value is 50us.
+  1: A nonzero value to set fixed timer delay in micro-seconds.
+     Default value is 500us.
+  Relevant only for event mode 0 and 1.
+
+- ``no_traffic_time`` parameter [int]
+
+  A nonzero value defines the traffic off time, in seconds, that moves the
+  driver to no-traffic mode. In this mode the timer events are stopped and
+  interrupts are configured to the device in order to notify traffic for the
+  driver.
+  Default value is 2s.
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 8b0b3b8..3fb5e2c 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -43,6 +43,7 @@
 
 #define MLX5_VDPA_MAX_RETRIES 20
 #define MLX5_VDPA_USEC 1000
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
 
 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
 					      TAILQ_HEAD_INITIALIZER(priv_list);
@@ -605,6 +606,62 @@
 	return -rte_errno;
 }
 
+static int
+mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
+{
+	struct mlx5_vdpa_priv *priv = opaque;
+	unsigned long tmp;
+
+	if (strcmp(key, "class") == 0)
+		return 0;
+	errno = 0;
+	tmp = strtoul(val, NULL, 0);
+	if (errno) {
+		DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
+		return -errno;
+	}
+	if (strcmp(key, "event_mode") == 0) {
+		if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
+			priv->event_mode = (int)tmp;
+		else
+			DRV_LOG(WARNING, "Invalid event_mode %s.", val);
+	} else if (strcmp(key, "event_us") == 0) {
+		priv->event_us = (uint32_t)tmp;
+	} else if (strcmp(key, "no_traffic_time") == 0) {
+		priv->no_traffic_time_s = (uint32_t)tmp;
+	} else {
+		DRV_LOG(WARNING, "Invalid key %s.", key);
+	}
+	return 0;
+}
+
+static void
+mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
+{
+	struct rte_kvargs *kvlist;
+
+	priv->event_mode = MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER;
+	priv->event_us = 0;
+	priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+	if (devargs == NULL)
+		return;
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (kvlist == NULL)
+		return;
+	rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv);
+	rte_kvargs_free(kvlist);
+	if (!priv->event_us) {
+		if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
+			priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
+		else if (priv->event_mode == MLX5_VDPA_EVENT_MODE_FIXED_TIMER)
+			priv->event_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+	}
+	priv->timer_delay_us = priv->event_us;
+	DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
+	DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
+	DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+}
+
 /**
  * DPDK callback to register a PCI device.
  *
@@ -694,6 +751,7 @@
 		rte_errno = rte_errno ? rte_errno : EINVAL;
 		goto error;
 	}
+	mlx5_vdpa_config_get(pci_dev->device.devargs, priv);
 	SLIST_INIT(&priv->mr_list);
 	pthread_mutex_lock(&priv_list_lock);
 	TAILQ_INSERT_TAIL(&priv_list, priv, next);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index ae1dcd8..c0228b2 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -35,6 +35,9 @@
 #define VIRTIO_F_RING_PACKED 34
 #endif
 
+#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
+#define MLX5_VDPA_DEFAULT_TIMER_STEP_US 50
+
 struct mlx5_vdpa_cq {
 	uint16_t log_desc_n;
 	uint32_t cq_ci:24;
@@ -100,16 +103,26 @@ struct mlx5_vdpa_steer {
 	} rss[7];
 };
 
+enum {
+	MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER,
+	MLX5_VDPA_EVENT_MODE_FIXED_TIMER,
+	MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT
+};
+
 struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
 	uint8_t direct_notifier; /* Whether direct notifier is on or off. */
 	uint64_t last_traffic_tic;
+	uint32_t last_total;
 	pthread_t timer_tid;
 	pthread_mutex_t timer_lock;
 	pthread_cond_t timer_cond;
 	volatile uint8_t timer_on;
+	int event_mode;
+	uint32_t event_us;
 	uint32_t timer_delay_us;
+	uint32_t no_traffic_time_s;
 	int id; /* vDPA device id. */
 	int vid; /* vhost device id. */
 	struct ibv_context *ctx; /* Device context. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 25f11fd..7e1204f 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -20,9 +20,6 @@
 #include "mlx5_vdpa.h"
 
 
-#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
-#define MLX5_VDPA_NO_TRAFFIC_TIME_S 2LLU
-
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -175,7 +172,8 @@
 		rte_errno = errno;
 		goto error;
 	}
-	if (callfd != -1) {
+	if (callfd != -1 &&
+	    priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
 		ret = mlx5_glue->devx_subscribe_devx_event_fd(priv->eventc,
 							      callfd,
 							      cq->cq->obj, 0);
@@ -253,6 +251,25 @@
 	}
 }
 
+static void
+mlx5_vdpa_timer_delay(struct mlx5_vdpa_priv *priv, uint32_t total)
+{
+	if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) {
+		if (total == 0 || priv->last_total == 0) {
+			priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+		} else {
+			if (total > priv->last_total) {
+				if (priv->timer_delay_us > priv->event_us)
+					priv->timer_delay_us -= priv->event_us;
+			} else {
+				priv->timer_delay_us += priv->event_us;
+			}
+		}
+		priv->last_total = total;
+	}
+	usleep(priv->timer_delay_us);
+}
+
 static void *
 mlx5_vdpa_poll_handle(void *arg)
 {
@@ -286,12 +303,13 @@
 		if (!total) {
 			/* No traffic ? stop timer and load interrupts. */
 			if (current_tic - priv->last_traffic_tic >=
-			    rte_get_timer_hz() * MLX5_VDPA_NO_TRAFFIC_TIME_S) {
+			    rte_get_timer_hz() * priv->no_traffic_time_s) {
 				DRV_LOG(DEBUG, "Device %d traffic was stopped.",
 					priv->id);
 				mlx5_vdpa_arm_all_cqs(priv);
 				pthread_mutex_lock(&priv->timer_lock);
 				priv->timer_on = 0;
+				priv->last_total = 0;
 				while (!priv->timer_on)
 					pthread_cond_wait(&priv->timer_cond,
 							  &priv->timer_lock);
@@ -301,7 +319,7 @@
 		} else {
 			priv->last_traffic_tic = current_tic;
 		}
-		usleep(priv->timer_delay_us);
+		mlx5_vdpa_timer_delay(priv, total);
 	}
 	return NULL;
 }
@@ -327,6 +345,13 @@
 						   struct mlx5_vdpa_virtq, eqp);
 
 		mlx5_vdpa_cq_poll(cq);
+		if (priv->event_mode == MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
+			mlx5_vdpa_cq_arm(priv, cq);
+			/* Notify guest for descs consuming. */
+			if (cq->callfd != -1)
+				eventfd_write(cq->callfd, (eventfd_t)1);
+			return;
+		}
 		/* Don't arm again - timer will take control. */
 		DRV_LOG(DEBUG, "Device %d virtq %d cq %d event was captured."
 			" Timer is %s, cq ci is %u.\n", priv->id,
@@ -355,15 +380,16 @@
 	if (!priv->eventc)
 		/* All virtqs are in poll mode. */
 		return 0;
-	pthread_mutex_init(&priv->timer_lock, NULL);
-	pthread_cond_init(&priv->timer_cond, NULL);
-	priv->timer_on = 0;
-	priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
-	ret = pthread_create(&priv->timer_tid, NULL, mlx5_vdpa_poll_handle,
-			     (void *)priv);
-	if (ret) {
-		DRV_LOG(ERR, "Failed to create timer thread.");
-		return -1;
+	if (priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
+		pthread_mutex_init(&priv->timer_lock, NULL);
+		pthread_cond_init(&priv->timer_cond, NULL);
+		priv->timer_on = 0;
+		ret = pthread_create(&priv->timer_tid, NULL,
+				     mlx5_vdpa_poll_handle, (void *)priv);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to create timer thread.");
+			return -1;
+		}
 	}
 	flags = fcntl(priv->eventc->fd, F_GETFL);
 	ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
-- 
1.8.3.1


  parent reply index

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-18 19:11 [dpdk-dev] [PATCH 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
2020-06-18 19:11 ` [dpdk-dev] [PATCH 1/3] vdpa/mlx5: optimize notification events Matan Azrad
2020-06-18 19:11 ` [dpdk-dev] [PATCH 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
2020-06-18 19:11 ` [dpdk-dev] [PATCH 3/3] vdpa/mlx5: add traffic control device arguments Matan Azrad
2020-06-25 13:30 ` [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 1/3] vdpa/mlx5: optimize notification events Matan Azrad
2020-06-29  9:05     ` Maxime Coquelin
2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
2020-06-29  9:11     ` Maxime Coquelin
2020-06-25 13:30   ` Matan Azrad [this message]
2020-06-29  9:16     ` [dpdk-dev] [PATCH v2 3/3] vdpa/mlx5: control completion queue event mode Maxime Coquelin
2020-06-29 14:01   ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 1/3] vdpa/mlx5: optimize notification events Matan Azrad
2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 3/3] vdpa/mlx5: control completion queue event mode Matan Azrad
2020-06-29 17:24     ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Maxime Coquelin

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1593091838-51869-4-git-send-email-matan@mellanox.com \
    --to=matan@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=maxime.coquelin@redhat.com \
    --cc=viacheslavo@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

Archives are clonable:
	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev


Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox