DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/3] vdpa/mlx5: optimize cpu utilization
@ 2020-06-18 19:11 Matan Azrad
  2020-06-18 19:11 ` [dpdk-dev] [PATCH 1/3] vdpa/mlx5: optimize notification events Matan Azrad
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-18 19:11 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev

Use timer thread to schedule CQ polling instead of interrupts.
Optimize CQ polling.

Matan Azrad (3):
  vdpa/mlx5: optimize notification events
  vdpa/mlx5: optimize completion queue poll
  vdpa/mlx5: add traffic control device arguments

 doc/guides/vdpadevs/mlx5.rst        |  17 +++
 drivers/vdpa/mlx5/Makefile          |   1 +
 drivers/vdpa/mlx5/mlx5_vdpa.c       |  46 ++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.h       |   8 ++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 226 ++++++++++++++++++++++++++++--------
 5 files changed, 248 insertions(+), 50 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH 1/3] vdpa/mlx5: optimize notification events
  2020-06-18 19:11 [dpdk-dev] [PATCH 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
@ 2020-06-18 19:11 ` Matan Azrad
  2020-06-18 19:11 ` [dpdk-dev] [PATCH 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-18 19:11 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev

When the virtio guest driver doesn't work with poll mode, the driver
creates event mechanism in order to schedule completion notifications
for each virtq burst traffic.

When traffic comes to a virtq, a CQE will be added to the virtq CQ by
the FW.
The driver requests interrupt for the next CQE index, and when interrupt
is triggered, the driver poll the CQ and notify the guest by virtq
callfd writing.

According to the described method, the interrupts will be triggered for
each burst of trrafic. The burst size depends on interrupt latancy.

Interrupts management takes a lot of CPU cycles and using it for each
traffic burst takes big portion of CPU capacity.

When traffic is on, using timer for CQ poll scheduling instead of
interrupts saves a lot of CPU cycles.

Move CQ poll scheduling to be done by timer in case of running traffic.
Request interrupts only when traffic is off.

The timer scheduling management is done by a new dedicated thread uses
a usleep command.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/vdpa/mlx5/Makefile          |   1 +
 drivers/vdpa/mlx5/mlx5_vdpa.h       |   7 ++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 175 ++++++++++++++++++++++++++++++------
 3 files changed, 157 insertions(+), 26 deletions(-)

diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
index 91c89d6..8a1c2ea 100644
--- a/drivers/vdpa/mlx5/Makefile
+++ b/drivers/vdpa/mlx5/Makefile
@@ -31,6 +31,7 @@ CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
 LDLIBS += -lrte_common_mlx5
 LDLIBS += -lrte_eal -lrte_vhost -lrte_kvargs -lrte_pci -lrte_bus_pci -lrte_sched
+LDLIBS += -pthread
 
 # A few warnings cannot be avoided in external headers.
 CFLAGS += -Wno-error=cast-qual
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index 80b4c4b..ae1dcd8 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -39,6 +39,7 @@ struct mlx5_vdpa_cq {
 	uint16_t log_desc_n;
 	uint32_t cq_ci:24;
 	uint32_t arm_sn:2;
+	uint32_t armed:1;
 	int callfd;
 	rte_spinlock_t sl;
 	struct mlx5_devx_obj *cq;
@@ -103,6 +104,12 @@ struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
 	uint8_t direct_notifier; /* Whether direct notifier is on or off. */
+	uint64_t last_traffic_tic;
+	pthread_t timer_tid;
+	pthread_mutex_t timer_lock;
+	pthread_cond_t timer_cond;
+	volatile uint8_t timer_on;
+	uint32_t timer_delay_us;
 	int id; /* vDPA device id. */
 	int vid; /* vhost device id. */
 	struct ibv_context *ctx; /* Device context. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index dd60150..69c8bf6 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -12,6 +12,7 @@
 #include <rte_atomic.h>
 #include <rte_common.h>
 #include <rte_io.h>
+#include <rte_alarm.h>
 
 #include <mlx5_common.h>
 
@@ -19,6 +20,9 @@
 #include "mlx5_vdpa.h"
 
 
+#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
+#define MLX5_VDPA_NO_TRAFFIC_TIME_S 2LLU
+
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -26,10 +30,23 @@
 		mlx5_glue->devx_free_uar(priv->uar);
 		priv->uar = NULL;
 	}
+#ifdef HAVE_IBV_DEVX_EVENT
 	if (priv->eventc) {
+		union {
+			struct mlx5dv_devx_async_event_hdr event_resp;
+			uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr)
+									 + 128];
+		} out;
+
+		/* Clean all pending events. */
+		while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
+		       sizeof(out.buf)) >=
+		       (ssize_t)sizeof(out.event_resp.cookie))
+			;
 		mlx5_glue->devx_destroy_event_channel(priv->eventc);
 		priv->eventc = NULL;
 	}
+#endif
 	priv->eqn = 0;
 }
 
@@ -79,7 +96,7 @@
 	memset(cq, 0, sizeof(*cq));
 }
 
-static inline void
+static inline void __rte_unused
 mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
 {
 	uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET;
@@ -100,6 +117,7 @@
 	*((uint32_t *)addr + 1) = db_be >> 32;
 #endif
 	cq->arm_sn++;
+	cq->armed = 1;
 }
 
 static int
@@ -157,6 +175,16 @@
 		rte_errno = errno;
 		goto error;
 	}
+	if (callfd != -1) {
+		ret = mlx5_glue->devx_subscribe_devx_event_fd(priv->eventc,
+							      callfd,
+							      cq->cq->obj, 0);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to subscribe CQE event fd.");
+			rte_errno = errno;
+			goto error;
+		}
+	}
 	cq->callfd = callfd;
 	/* Init CQ to ones to be in HW owner in the start. */
 	memset((void *)(uintptr_t)cq->umem_buf, 0xFF, attr.db_umem_offset);
@@ -168,27 +196,27 @@
 	return -1;
 }
 
-static inline void __rte_unused
-mlx5_vdpa_cq_poll(struct mlx5_vdpa_priv *priv __rte_unused,
-		  struct mlx5_vdpa_cq *cq)
+static inline uint32_t
+mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq)
 {
 	struct mlx5_vdpa_event_qp *eqp =
 				container_of(cq, struct mlx5_vdpa_event_qp, cq);
 	const unsigned int cq_size = 1 << cq->log_desc_n;
 	const unsigned int cq_mask = cq_size - 1;
+	uint32_t total = 0;
 	int ret;
 
 	do {
-		volatile struct mlx5_cqe *cqe = cq->cqes + (cq->cq_ci &
-							    cq_mask);
+		volatile struct mlx5_cqe *cqe = cq->cqes + ((cq->cq_ci + total)
+							    & cq_mask);
 
-		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		ret = check_cqe(cqe, cq_size, cq->cq_ci + total);
 		switch (ret) {
 		case MLX5_CQE_STATUS_ERR:
 			cq->errors++;
 			/*fall-through*/
 		case MLX5_CQE_STATUS_SW_OWN:
-			cq->cq_ci++;
+			total++;
 			break;
 		case MLX5_CQE_STATUS_HW_OWN:
 		default:
@@ -196,21 +224,86 @@
 		}
 	} while (ret != MLX5_CQE_STATUS_HW_OWN);
 	rte_io_wmb();
+	cq->cq_ci += total;
 	/* Ring CQ doorbell record. */
 	cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
 	rte_io_wmb();
 	/* Ring SW QP doorbell record. */
 	eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
+	return total;
+}
+
+static void
+mlx5_vdpa_arm_all_cqs(struct mlx5_vdpa_priv *priv)
+{
+	struct mlx5_vdpa_cq *cq;
+	int i;
+
+	for (i = 0; i < priv->nr_virtqs; i++) {
+		cq = &priv->virtqs[i].eqp.cq;
+		if (cq->cq && !cq->armed)
+			mlx5_vdpa_cq_arm(priv, cq);
+	}
+}
+
+static void *
+mlx5_vdpa_poll_handle(void *arg)
+{
+	struct mlx5_vdpa_priv *priv = arg;
+	int i;
+	struct mlx5_vdpa_cq *cq;
+	uint32_t total;
+	uint64_t current_tic;
+
+	pthread_mutex_lock(&priv->timer_lock);
+	while (!priv->timer_on)
+		pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
+	pthread_mutex_unlock(&priv->timer_lock);
+	while (1) {
+		total = 0;
+		for (i = 0; i < priv->nr_virtqs; i++) {
+			cq = &priv->virtqs[i].eqp.cq;
+			if (cq->cq && !cq->armed) {
+				uint32_t comp = mlx5_vdpa_cq_poll(cq);
+
+				if (comp) {
+					/* Notify guest for descs consuming. */
+					if (cq->callfd != -1)
+						eventfd_write(cq->callfd,
+							      (eventfd_t)1);
+					total += comp;
+				}
+			}
+		}
+		current_tic = rte_rdtsc();
+		if (!total) {
+			/* No traffic ? stop timer and load interrupts. */
+			if (current_tic - priv->last_traffic_tic >=
+			    rte_get_timer_hz() * MLX5_VDPA_NO_TRAFFIC_TIME_S) {
+				DRV_LOG(DEBUG, "Device %d traffic was stopped.",
+					priv->id);
+				mlx5_vdpa_arm_all_cqs(priv);
+				pthread_mutex_lock(&priv->timer_lock);
+				priv->timer_on = 0;
+				while (!priv->timer_on)
+					pthread_cond_wait(&priv->timer_cond,
+							  &priv->timer_lock);
+				pthread_mutex_unlock(&priv->timer_lock);
+				continue;
+			}
+		} else {
+			priv->last_traffic_tic = current_tic;
+		}
+		usleep(priv->timer_delay_us);
+	}
+	return NULL;
 }
 
 static void
 mlx5_vdpa_interrupt_handler(void *cb_arg)
 {
-#ifndef HAVE_IBV_DEVX_EVENT
-	(void)cb_arg;
-	return;
-#else
 	struct mlx5_vdpa_priv *priv = cb_arg;
+#ifdef HAVE_IBV_DEVX_EVENT
 	union {
 		struct mlx5dv_devx_async_event_hdr event_resp;
 		uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
@@ -221,17 +314,29 @@
 				       (ssize_t)sizeof(out.event_resp.cookie)) {
 		struct mlx5_vdpa_cq *cq = (struct mlx5_vdpa_cq *)
 					       (uintptr_t)out.event_resp.cookie;
-		rte_spinlock_lock(&cq->sl);
-		mlx5_vdpa_cq_poll(priv, cq);
-		mlx5_vdpa_cq_arm(priv, cq);
-		if (cq->callfd != -1)
-			/* Notify guest for descriptors consuming. */
-			eventfd_write(cq->callfd, (eventfd_t)1);
-		rte_spinlock_unlock(&cq->sl);
-		DRV_LOG(DEBUG, "CQ %d event: new cq_ci = %u.", cq->cq->id,
-			cq->cq_ci);
+		struct mlx5_vdpa_event_qp *eqp = container_of(cq,
+						 struct mlx5_vdpa_event_qp, cq);
+		struct mlx5_vdpa_virtq *virtq = container_of(eqp,
+						   struct mlx5_vdpa_virtq, eqp);
+
+		mlx5_vdpa_cq_poll(cq);
+		/* Don't arm again - timer will take control. */
+		DRV_LOG(DEBUG, "Device %d virtq %d cq %d event was captured."
+			" Timer is %s, cq ci is %u.\n", priv->id,
+			(int)virtq->index, cq->cq->id, priv->timer_on ? "on" :
+			"off", cq->cq_ci);
+		cq->armed = 0;
+	}
+#endif
+
+	/* Traffic detected: make sure timer is on. */
+	priv->last_traffic_tic = rte_rdtsc();
+	pthread_mutex_lock(&priv->timer_lock);
+	if (!priv->timer_on) {
+		priv->timer_on = 1;
+		pthread_cond_signal(&priv->timer_cond);
 	}
-#endif /* HAVE_IBV_DEVX_ASYNC */
+	pthread_mutex_unlock(&priv->timer_lock);
 }
 
 int
@@ -243,12 +348,21 @@
 	if (!priv->eventc)
 		/* All virtqs are in poll mode. */
 		return 0;
+	pthread_mutex_init(&priv->timer_lock, NULL);
+	pthread_cond_init(&priv->timer_cond, NULL);
+	priv->timer_on = 0;
+	priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+	ret = pthread_create(&priv->timer_tid, NULL, mlx5_vdpa_poll_handle,
+			     (void *)priv);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to create timer thread.");
+		return -1;
+	}
 	flags = fcntl(priv->eventc->fd, F_GETFL);
 	ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
 	if (ret) {
 		DRV_LOG(ERR, "Failed to change event channel FD.");
-		rte_errno = errno;
-		return -rte_errno;
+		goto error;
 	}
 	priv->intr_handle.fd = priv->eventc->fd;
 	priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
@@ -256,9 +370,12 @@
 				       mlx5_vdpa_interrupt_handler, priv)) {
 		priv->intr_handle.fd = 0;
 		DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno);
-		return -rte_errno;
+		goto error;
 	}
 	return 0;
+error:
+	mlx5_vdpa_cqe_event_unset(priv);
+	return -1;
 }
 
 void
@@ -266,6 +383,7 @@
 {
 	int retries = MLX5_VDPA_INTR_RETRIES;
 	int ret = -EAGAIN;
+	void *status;
 
 	if (priv->intr_handle.fd) {
 		while (retries-- && ret == -EAGAIN) {
@@ -276,11 +394,16 @@
 				DRV_LOG(DEBUG, "Try again to unregister fd %d "
 					"of CQ interrupt, retries = %d.",
 					priv->intr_handle.fd, retries);
-				usleep(MLX5_VDPA_INTR_RETRIES_USEC);
+				rte_pause();
 			}
 		}
 		memset(&priv->intr_handle, 0, sizeof(priv->intr_handle));
 	}
+	if (priv->timer_tid) {
+		pthread_cancel(priv->timer_tid);
+		pthread_join(priv->timer_tid, &status);
+	}
+	priv->timer_tid = 0;
 }
 
 void
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH 2/3] vdpa/mlx5: optimize completion queue poll
  2020-06-18 19:11 [dpdk-dev] [PATCH 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  2020-06-18 19:11 ` [dpdk-dev] [PATCH 1/3] vdpa/mlx5: optimize notification events Matan Azrad
@ 2020-06-18 19:11 ` Matan Azrad
  2020-06-18 19:11 ` [dpdk-dev] [PATCH 3/3] vdpa/mlx5: add traffic control device arguments Matan Azrad
  2020-06-25 13:30 ` [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  3 siblings, 0 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-18 19:11 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev

The vDPA driver uses a CQ in order to know when traffic works were
completed by the HW.

Each traffic completion adds a CQE to the CQ.

When the vDPA driver detects CQEs in the CQ, it triggers the guest
notification for the corresponding queue and consumes all of them.

There is collapse feature in the HW that configures the HW to write all the
CQEs in the first entry of the CQ.

Using this feature, the vDPA driver can read only the first CQE,
validate that the completion counter inside the CQE was changed and if
so, to notify the guest.

Use CQ collapse feature in order to improve the poll utilization.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 73 ++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 69c8bf6..25f11fd 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -127,12 +127,12 @@
 	struct mlx5_devx_cq_attr attr;
 	size_t pgsize = sysconf(_SC_PAGESIZE);
 	uint32_t umem_size;
-	int ret;
 	uint16_t event_nums[1] = {0};
+	uint16_t cq_size = 1 << log_desc_n;
+	int ret;
 
 	cq->log_desc_n = log_desc_n;
-	umem_size = sizeof(struct mlx5_cqe) * (1 << log_desc_n) +
-							sizeof(*cq->db_rec) * 2;
+	umem_size = sizeof(struct mlx5_cqe) * cq_size + sizeof(*cq->db_rec) * 2;
 	cq->umem_buf = rte_zmalloc(__func__, umem_size, 4096);
 	if (!cq->umem_buf) {
 		DRV_LOG(ERR, "Failed to allocate memory for CQ.");
@@ -149,13 +149,13 @@
 	}
 	attr.q_umem_valid = 1;
 	attr.db_umem_valid = 1;
-	attr.use_first_only = 0;
+	attr.use_first_only = 1;
 	attr.overrun_ignore = 0;
 	attr.uar_page_id = priv->uar->page_id;
 	attr.q_umem_id = cq->umem_obj->umem_id;
 	attr.q_umem_offset = 0;
 	attr.db_umem_id = cq->umem_obj->umem_id;
-	attr.db_umem_offset = sizeof(struct mlx5_cqe) * (1 << log_desc_n);
+	attr.db_umem_offset = sizeof(struct mlx5_cqe) * cq_size;
 	attr.eqn = priv->eqn;
 	attr.log_cq_size = log_desc_n;
 	attr.log_page_size = rte_log2_u32(pgsize);
@@ -187,7 +187,8 @@
 	}
 	cq->callfd = callfd;
 	/* Init CQ to ones to be in HW owner in the start. */
-	memset((void *)(uintptr_t)cq->umem_buf, 0xFF, attr.db_umem_offset);
+	cq->cqes[0].op_own = MLX5_CQE_OWNER_MASK;
+	cq->cqes[0].wqe_counter = rte_cpu_to_be_16(cq_size - 1);
 	/* First arming. */
 	mlx5_vdpa_cq_arm(priv, cq);
 	return 0;
@@ -203,34 +204,40 @@
 				container_of(cq, struct mlx5_vdpa_event_qp, cq);
 	const unsigned int cq_size = 1 << cq->log_desc_n;
 	const unsigned int cq_mask = cq_size - 1;
-	uint32_t total = 0;
-	int ret;
-
-	do {
-		volatile struct mlx5_cqe *cqe = cq->cqes + ((cq->cq_ci + total)
-							    & cq_mask);
-
-		ret = check_cqe(cqe, cq_size, cq->cq_ci + total);
-		switch (ret) {
-		case MLX5_CQE_STATUS_ERR:
+	union {
+		struct {
+			uint16_t wqe_counter;
+			uint8_t rsvd5;
+			uint8_t op_own;
+		};
+		uint32_t word;
+	} last_word;
+	uint16_t next_wqe_counter = cq->cq_ci & cq_mask;
+	uint16_t cur_wqe_counter;
+	uint16_t comp;
+
+	last_word.word = rte_read32(&cq->cqes[0].wqe_counter);
+	cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter);
+	comp = (cur_wqe_counter + 1u - next_wqe_counter) & cq_mask;
+	if (comp) {
+		cq->cq_ci += comp;
+		MLX5_ASSERT(!!(cq->cq_ci & cq_size) ==
+			    MLX5_CQE_OWNER(last_word.op_own));
+		MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) !=
+			    MLX5_CQE_INVALID);
+		if (unlikely(!(MLX5_CQE_OPCODE(last_word.op_own) ==
+			       MLX5_CQE_RESP_ERR ||
+			       MLX5_CQE_OPCODE(last_word.op_own) ==
+			       MLX5_CQE_REQ_ERR)))
 			cq->errors++;
-			/*fall-through*/
-		case MLX5_CQE_STATUS_SW_OWN:
-			total++;
-			break;
-		case MLX5_CQE_STATUS_HW_OWN:
-		default:
-			break;
-		}
-	} while (ret != MLX5_CQE_STATUS_HW_OWN);
-	rte_io_wmb();
-	cq->cq_ci += total;
-	/* Ring CQ doorbell record. */
-	cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
-	rte_io_wmb();
-	/* Ring SW QP doorbell record. */
-	eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
-	return total;
+		rte_io_wmb();
+		/* Ring CQ doorbell record. */
+		cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+		rte_io_wmb();
+		/* Ring SW QP doorbell record. */
+		eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
+	}
+	return comp;
 }
 
 static void
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH 3/3] vdpa/mlx5: add traffic control device arguments
  2020-06-18 19:11 [dpdk-dev] [PATCH 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  2020-06-18 19:11 ` [dpdk-dev] [PATCH 1/3] vdpa/mlx5: optimize notification events Matan Azrad
  2020-06-18 19:11 ` [dpdk-dev] [PATCH 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
@ 2020-06-18 19:11 ` Matan Azrad
  2020-06-25 13:30 ` [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  3 siblings, 0 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-18 19:11 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev

Add 2 device arguments to conrol traffic modes:
1. Control the CQ polling timer frequency when traffic is on.
2. Control the non-traffic time which moves the timer to be off.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 doc/guides/vdpadevs/mlx5.rst        | 17 ++++++++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.c       | 46 +++++++++++++++++++++++++++++++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.h       |  1 +
 drivers/vdpa/mlx5/mlx5_vdpa_event.c |  6 +----
 4 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/doc/guides/vdpadevs/mlx5.rst b/doc/guides/vdpadevs/mlx5.rst
index dd377af..05f26e0 100644
--- a/doc/guides/vdpadevs/mlx5.rst
+++ b/doc/guides/vdpadevs/mlx5.rst
@@ -106,8 +106,25 @@ Run-time configuration
 
 - **ethtool** operations on related kernel interfaces also affect the PMD.
 
+Driver options
+^^^^^^^^^^^^^^
+
 - ``class`` parameter [string]
 
   Select the class of the driver that should probe the device.
   `vdpa` for the mlx5 vDPA driver.
 
+- ``timer_delay`` parameter [int]
+
+  A nonzero value allows to configure the internal timer delay in micro-seconds.
+  The internal timer event causes polling of all the CQs, so this number may
+  affect the performance.
+  Default value is 500us.
+
+- ``no_traffic_time`` parameter [int]
+
+  A nonzero value defines the traffic off time, in seconds, that moves the
+  driver to no-traffic mode. In this mode the timer events are stopped and
+  interrupts are configured to the device in order to notify traffic for the
+  driver.
+  Default value is 2s.
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 8b0b3b8..9e758b6 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -43,6 +43,8 @@
 
 #define MLX5_VDPA_MAX_RETRIES 20
 #define MLX5_VDPA_USEC 1000
+#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
 
 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
 					      TAILQ_HEAD_INITIALIZER(priv_list);
@@ -605,6 +607,49 @@
 	return -rte_errno;
 }
 
+static int
+mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
+{
+	struct mlx5_vdpa_priv *priv = opaque;
+	unsigned long tmp;
+
+	if (strcmp(key, "class") == 0)
+		return 0;
+	errno = 0;
+	tmp = strtoul(val, NULL, 0);
+	if (errno) {
+		DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
+		return -errno;
+	}
+	if (strcmp(key, "timer_delay") == 0) {
+		priv->timer_delay_us = (uint32_t)tmp;
+	} else if (strcmp(key, "no_traffic_time") == 0) {
+		priv->no_traffic_time_s = (uint32_t)tmp;
+	} else {
+		DRV_LOG(WARNING, "Invalid key %s.", key);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void
+mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
+{
+	struct rte_kvargs *kvlist;
+
+	priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+	priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+	if (devargs == NULL)
+		return;
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (kvlist == NULL)
+		return;
+	rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv);
+	rte_kvargs_free(kvlist);
+	DRV_LOG(DEBUG, "timer delay is %u us.", priv->timer_delay_us);
+	DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+}
+
 /**
  * DPDK callback to register a PCI device.
  *
@@ -694,6 +739,7 @@
 		rte_errno = rte_errno ? rte_errno : EINVAL;
 		goto error;
 	}
+	mlx5_vdpa_config_get(pci_dev->device.devargs, priv);
 	SLIST_INIT(&priv->mr_list);
 	pthread_mutex_lock(&priv_list_lock);
 	TAILQ_INSERT_TAIL(&priv_list, priv, next);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index ae1dcd8..28ec0be 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -110,6 +110,7 @@ struct mlx5_vdpa_priv {
 	pthread_cond_t timer_cond;
 	volatile uint8_t timer_on;
 	uint32_t timer_delay_us;
+	uint32_t no_traffic_time_s;
 	int id; /* vDPA device id. */
 	int vid; /* vhost device id. */
 	struct ibv_context *ctx; /* Device context. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 25f11fd..06a373a 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -20,9 +20,6 @@
 #include "mlx5_vdpa.h"
 
 
-#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
-#define MLX5_VDPA_NO_TRAFFIC_TIME_S 2LLU
-
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -286,7 +283,7 @@
 		if (!total) {
 			/* No traffic ? stop timer and load interrupts. */
 			if (current_tic - priv->last_traffic_tic >=
-			    rte_get_timer_hz() * MLX5_VDPA_NO_TRAFFIC_TIME_S) {
+			    rte_get_timer_hz() * priv->no_traffic_time_s) {
 				DRV_LOG(DEBUG, "Device %d traffic was stopped.",
 					priv->id);
 				mlx5_vdpa_arm_all_cqs(priv);
@@ -358,7 +355,6 @@
 	pthread_mutex_init(&priv->timer_lock, NULL);
 	pthread_cond_init(&priv->timer_cond, NULL);
 	priv->timer_on = 0;
-	priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
 	ret = pthread_create(&priv->timer_tid, NULL, mlx5_vdpa_poll_handle,
 			     (void *)priv);
 	if (ret) {
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization
  2020-06-18 19:11 [dpdk-dev] [PATCH 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
                   ` (2 preceding siblings ...)
  2020-06-18 19:11 ` [dpdk-dev] [PATCH 3/3] vdpa/mlx5: add traffic control device arguments Matan Azrad
@ 2020-06-25 13:30 ` Matan Azrad
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 1/3] vdpa/mlx5: optimize notification events Matan Azrad
                     ` (3 more replies)
  3 siblings, 4 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-25 13:30 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, Viacheslav Ovsiienko

Defines new event modes in order to schedule CQ polling.

Timer thread is created in mode 0 and 1 in order to save CPU utilization.

v2:
Add devargs to control the events mode.


Matan Azrad (3):
  vdpa/mlx5: optimize notification events
  vdpa/mlx5: optimize completion queue poll
  vdpa/mlx5: control completion queue event mode

 doc/guides/vdpadevs/mlx5.rst        |  32 +++++
 drivers/vdpa/mlx5/Makefile          |   1 +
 drivers/vdpa/mlx5/mlx5_vdpa.c       |  58 ++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.h       |  20 +++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 256 +++++++++++++++++++++++++++++-------
 5 files changed, 317 insertions(+), 50 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] vdpa/mlx5: optimize notification events
  2020-06-25 13:30 ` [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
@ 2020-06-25 13:30   ` Matan Azrad
  2020-06-29  9:05     ` Maxime Coquelin
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Matan Azrad @ 2020-06-25 13:30 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, Viacheslav Ovsiienko

When the virtio guest driver doesn't work with poll mode, the driver
creates event mechanism in order to schedule completion notifications
for each virtq burst traffic.

When traffic comes to a virtq, a CQE will be added to the virtq CQ by
the FW.
The driver requests interrupt for the next CQE index, and when interrupt
is triggered, the driver polls the CQ and notifies the guest by virtq
callfd writing.

According to the described method, the interrupts will be triggered for
each burst of trrafic. The burst size depends on interrupt latancy.

Interrupts management takes a lot of CPU cycles and using it for each
traffic burst takes big portion of CPU capacity.

When traffic is on, using timer for CQ poll scheduling instead of
interrupts saves a lot of CPU cycles.

Move CQ poll scheduling to be done by timer in case of running traffic.
Request interrupts only when traffic is off.

The timer scheduling management is done by a new dedicated thread uses
a usleep command.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/vdpa/mlx5/Makefile          |   1 +
 drivers/vdpa/mlx5/mlx5_vdpa.h       |   7 ++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 175 ++++++++++++++++++++++++++++++------
 3 files changed, 157 insertions(+), 26 deletions(-)

diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
index 91c89d6..8a1c2ea 100644
--- a/drivers/vdpa/mlx5/Makefile
+++ b/drivers/vdpa/mlx5/Makefile
@@ -31,6 +31,7 @@ CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
 LDLIBS += -lrte_common_mlx5
 LDLIBS += -lrte_eal -lrte_vhost -lrte_kvargs -lrte_pci -lrte_bus_pci -lrte_sched
+LDLIBS += -pthread
 
 # A few warnings cannot be avoided in external headers.
 CFLAGS += -Wno-error=cast-qual
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index 80b4c4b..ae1dcd8 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -39,6 +39,7 @@ struct mlx5_vdpa_cq {
 	uint16_t log_desc_n;
 	uint32_t cq_ci:24;
 	uint32_t arm_sn:2;
+	uint32_t armed:1;
 	int callfd;
 	rte_spinlock_t sl;
 	struct mlx5_devx_obj *cq;
@@ -103,6 +104,12 @@ struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
 	uint8_t direct_notifier; /* Whether direct notifier is on or off. */
+	uint64_t last_traffic_tic;
+	pthread_t timer_tid;
+	pthread_mutex_t timer_lock;
+	pthread_cond_t timer_cond;
+	volatile uint8_t timer_on;
+	uint32_t timer_delay_us;
 	int id; /* vDPA device id. */
 	int vid; /* vhost device id. */
 	struct ibv_context *ctx; /* Device context. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index dd60150..69c8bf6 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -12,6 +12,7 @@
 #include <rte_atomic.h>
 #include <rte_common.h>
 #include <rte_io.h>
+#include <rte_alarm.h>
 
 #include <mlx5_common.h>
 
@@ -19,6 +20,9 @@
 #include "mlx5_vdpa.h"
 
 
+#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
+#define MLX5_VDPA_NO_TRAFFIC_TIME_S 2LLU
+
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -26,10 +30,23 @@
 		mlx5_glue->devx_free_uar(priv->uar);
 		priv->uar = NULL;
 	}
+#ifdef HAVE_IBV_DEVX_EVENT
 	if (priv->eventc) {
+		union {
+			struct mlx5dv_devx_async_event_hdr event_resp;
+			uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr)
+									 + 128];
+		} out;
+
+		/* Clean all pending events. */
+		while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
+		       sizeof(out.buf)) >=
+		       (ssize_t)sizeof(out.event_resp.cookie))
+			;
 		mlx5_glue->devx_destroy_event_channel(priv->eventc);
 		priv->eventc = NULL;
 	}
+#endif
 	priv->eqn = 0;
 }
 
@@ -79,7 +96,7 @@
 	memset(cq, 0, sizeof(*cq));
 }
 
-static inline void
+static inline void __rte_unused
 mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
 {
 	uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET;
@@ -100,6 +117,7 @@
 	*((uint32_t *)addr + 1) = db_be >> 32;
 #endif
 	cq->arm_sn++;
+	cq->armed = 1;
 }
 
 static int
@@ -157,6 +175,16 @@
 		rte_errno = errno;
 		goto error;
 	}
+	if (callfd != -1) {
+		ret = mlx5_glue->devx_subscribe_devx_event_fd(priv->eventc,
+							      callfd,
+							      cq->cq->obj, 0);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to subscribe CQE event fd.");
+			rte_errno = errno;
+			goto error;
+		}
+	}
 	cq->callfd = callfd;
 	/* Init CQ to ones to be in HW owner in the start. */
 	memset((void *)(uintptr_t)cq->umem_buf, 0xFF, attr.db_umem_offset);
@@ -168,27 +196,27 @@
 	return -1;
 }
 
-static inline void __rte_unused
-mlx5_vdpa_cq_poll(struct mlx5_vdpa_priv *priv __rte_unused,
-		  struct mlx5_vdpa_cq *cq)
+static inline uint32_t
+mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq)
 {
 	struct mlx5_vdpa_event_qp *eqp =
 				container_of(cq, struct mlx5_vdpa_event_qp, cq);
 	const unsigned int cq_size = 1 << cq->log_desc_n;
 	const unsigned int cq_mask = cq_size - 1;
+	uint32_t total = 0;
 	int ret;
 
 	do {
-		volatile struct mlx5_cqe *cqe = cq->cqes + (cq->cq_ci &
-							    cq_mask);
+		volatile struct mlx5_cqe *cqe = cq->cqes + ((cq->cq_ci + total)
+							    & cq_mask);
 
-		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		ret = check_cqe(cqe, cq_size, cq->cq_ci + total);
 		switch (ret) {
 		case MLX5_CQE_STATUS_ERR:
 			cq->errors++;
 			/*fall-through*/
 		case MLX5_CQE_STATUS_SW_OWN:
-			cq->cq_ci++;
+			total++;
 			break;
 		case MLX5_CQE_STATUS_HW_OWN:
 		default:
@@ -196,21 +224,86 @@
 		}
 	} while (ret != MLX5_CQE_STATUS_HW_OWN);
 	rte_io_wmb();
+	cq->cq_ci += total;
 	/* Ring CQ doorbell record. */
 	cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
 	rte_io_wmb();
 	/* Ring SW QP doorbell record. */
 	eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
+	return total;
+}
+
+static void
+mlx5_vdpa_arm_all_cqs(struct mlx5_vdpa_priv *priv)
+{
+	struct mlx5_vdpa_cq *cq;
+	int i;
+
+	for (i = 0; i < priv->nr_virtqs; i++) {
+		cq = &priv->virtqs[i].eqp.cq;
+		if (cq->cq && !cq->armed)
+			mlx5_vdpa_cq_arm(priv, cq);
+	}
+}
+
+static void *
+mlx5_vdpa_poll_handle(void *arg)
+{
+	struct mlx5_vdpa_priv *priv = arg;
+	int i;
+	struct mlx5_vdpa_cq *cq;
+	uint32_t total;
+	uint64_t current_tic;
+
+	pthread_mutex_lock(&priv->timer_lock);
+	while (!priv->timer_on)
+		pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
+	pthread_mutex_unlock(&priv->timer_lock);
+	while (1) {
+		total = 0;
+		for (i = 0; i < priv->nr_virtqs; i++) {
+			cq = &priv->virtqs[i].eqp.cq;
+			if (cq->cq && !cq->armed) {
+				uint32_t comp = mlx5_vdpa_cq_poll(cq);
+
+				if (comp) {
+					/* Notify guest for descs consuming. */
+					if (cq->callfd != -1)
+						eventfd_write(cq->callfd,
+							      (eventfd_t)1);
+					total += comp;
+				}
+			}
+		}
+		current_tic = rte_rdtsc();
+		if (!total) {
+			/* No traffic ? stop timer and load interrupts. */
+			if (current_tic - priv->last_traffic_tic >=
+			    rte_get_timer_hz() * MLX5_VDPA_NO_TRAFFIC_TIME_S) {
+				DRV_LOG(DEBUG, "Device %d traffic was stopped.",
+					priv->id);
+				mlx5_vdpa_arm_all_cqs(priv);
+				pthread_mutex_lock(&priv->timer_lock);
+				priv->timer_on = 0;
+				while (!priv->timer_on)
+					pthread_cond_wait(&priv->timer_cond,
+							  &priv->timer_lock);
+				pthread_mutex_unlock(&priv->timer_lock);
+				continue;
+			}
+		} else {
+			priv->last_traffic_tic = current_tic;
+		}
+		usleep(priv->timer_delay_us);
+	}
+	return NULL;
 }
 
 static void
 mlx5_vdpa_interrupt_handler(void *cb_arg)
 {
-#ifndef HAVE_IBV_DEVX_EVENT
-	(void)cb_arg;
-	return;
-#else
 	struct mlx5_vdpa_priv *priv = cb_arg;
+#ifdef HAVE_IBV_DEVX_EVENT
 	union {
 		struct mlx5dv_devx_async_event_hdr event_resp;
 		uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
@@ -221,17 +314,29 @@
 				       (ssize_t)sizeof(out.event_resp.cookie)) {
 		struct mlx5_vdpa_cq *cq = (struct mlx5_vdpa_cq *)
 					       (uintptr_t)out.event_resp.cookie;
-		rte_spinlock_lock(&cq->sl);
-		mlx5_vdpa_cq_poll(priv, cq);
-		mlx5_vdpa_cq_arm(priv, cq);
-		if (cq->callfd != -1)
-			/* Notify guest for descriptors consuming. */
-			eventfd_write(cq->callfd, (eventfd_t)1);
-		rte_spinlock_unlock(&cq->sl);
-		DRV_LOG(DEBUG, "CQ %d event: new cq_ci = %u.", cq->cq->id,
-			cq->cq_ci);
+		struct mlx5_vdpa_event_qp *eqp = container_of(cq,
+						 struct mlx5_vdpa_event_qp, cq);
+		struct mlx5_vdpa_virtq *virtq = container_of(eqp,
+						   struct mlx5_vdpa_virtq, eqp);
+
+		mlx5_vdpa_cq_poll(cq);
+		/* Don't arm again - timer will take control. */
+		DRV_LOG(DEBUG, "Device %d virtq %d cq %d event was captured."
+			" Timer is %s, cq ci is %u.\n", priv->id,
+			(int)virtq->index, cq->cq->id, priv->timer_on ? "on" :
+			"off", cq->cq_ci);
+		cq->armed = 0;
+	}
+#endif
+
+	/* Traffic detected: make sure timer is on. */
+	priv->last_traffic_tic = rte_rdtsc();
+	pthread_mutex_lock(&priv->timer_lock);
+	if (!priv->timer_on) {
+		priv->timer_on = 1;
+		pthread_cond_signal(&priv->timer_cond);
 	}
-#endif /* HAVE_IBV_DEVX_ASYNC */
+	pthread_mutex_unlock(&priv->timer_lock);
 }
 
 int
@@ -243,12 +348,21 @@
 	if (!priv->eventc)
 		/* All virtqs are in poll mode. */
 		return 0;
+	pthread_mutex_init(&priv->timer_lock, NULL);
+	pthread_cond_init(&priv->timer_cond, NULL);
+	priv->timer_on = 0;
+	priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+	ret = pthread_create(&priv->timer_tid, NULL, mlx5_vdpa_poll_handle,
+			     (void *)priv);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to create timer thread.");
+		return -1;
+	}
 	flags = fcntl(priv->eventc->fd, F_GETFL);
 	ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
 	if (ret) {
 		DRV_LOG(ERR, "Failed to change event channel FD.");
-		rte_errno = errno;
-		return -rte_errno;
+		goto error;
 	}
 	priv->intr_handle.fd = priv->eventc->fd;
 	priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
@@ -256,9 +370,12 @@
 				       mlx5_vdpa_interrupt_handler, priv)) {
 		priv->intr_handle.fd = 0;
 		DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno);
-		return -rte_errno;
+		goto error;
 	}
 	return 0;
+error:
+	mlx5_vdpa_cqe_event_unset(priv);
+	return -1;
 }
 
 void
@@ -266,6 +383,7 @@
 {
 	int retries = MLX5_VDPA_INTR_RETRIES;
 	int ret = -EAGAIN;
+	void *status;
 
 	if (priv->intr_handle.fd) {
 		while (retries-- && ret == -EAGAIN) {
@@ -276,11 +394,16 @@
 				DRV_LOG(DEBUG, "Try again to unregister fd %d "
 					"of CQ interrupt, retries = %d.",
 					priv->intr_handle.fd, retries);
-				usleep(MLX5_VDPA_INTR_RETRIES_USEC);
+				rte_pause();
 			}
 		}
 		memset(&priv->intr_handle, 0, sizeof(priv->intr_handle));
 	}
+	if (priv->timer_tid) {
+		pthread_cancel(priv->timer_tid);
+		pthread_join(priv->timer_tid, &status);
+	}
+	priv->timer_tid = 0;
 }
 
 void
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2 2/3] vdpa/mlx5: optimize completion queue poll
  2020-06-25 13:30 ` [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 1/3] vdpa/mlx5: optimize notification events Matan Azrad
@ 2020-06-25 13:30   ` Matan Azrad
  2020-06-29  9:11     ` Maxime Coquelin
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 3/3] vdpa/mlx5: control completion queue event mode Matan Azrad
  2020-06-29 14:01   ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  3 siblings, 1 reply; 16+ messages in thread
From: Matan Azrad @ 2020-06-25 13:30 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, Viacheslav Ovsiienko

The vDPA driver uses a CQ in order to know when traffic works were
completed by the HW.

Each traffic burst completion adds a CQE to the CQ.

When the vDPA driver detects CQEs in the CQ, it triggers the guest
notification for the corresponding queue and consumes all of them.

There is collapse feature in the HW that configures the HW to write all the
CQEs in the first entry of the CQ.

Using this feature, the vDPA driver can read only the first CQE,
validate that the completion counter inside the CQE was changed and if
so, to notify the guest.

Use CQ collapse feature in order to improve the poll utilization.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 73 ++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 69c8bf6..25f11fd 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -127,12 +127,12 @@
 	struct mlx5_devx_cq_attr attr;
 	size_t pgsize = sysconf(_SC_PAGESIZE);
 	uint32_t umem_size;
-	int ret;
 	uint16_t event_nums[1] = {0};
+	uint16_t cq_size = 1 << log_desc_n;
+	int ret;
 
 	cq->log_desc_n = log_desc_n;
-	umem_size = sizeof(struct mlx5_cqe) * (1 << log_desc_n) +
-							sizeof(*cq->db_rec) * 2;
+	umem_size = sizeof(struct mlx5_cqe) * cq_size + sizeof(*cq->db_rec) * 2;
 	cq->umem_buf = rte_zmalloc(__func__, umem_size, 4096);
 	if (!cq->umem_buf) {
 		DRV_LOG(ERR, "Failed to allocate memory for CQ.");
@@ -149,13 +149,13 @@
 	}
 	attr.q_umem_valid = 1;
 	attr.db_umem_valid = 1;
-	attr.use_first_only = 0;
+	attr.use_first_only = 1;
 	attr.overrun_ignore = 0;
 	attr.uar_page_id = priv->uar->page_id;
 	attr.q_umem_id = cq->umem_obj->umem_id;
 	attr.q_umem_offset = 0;
 	attr.db_umem_id = cq->umem_obj->umem_id;
-	attr.db_umem_offset = sizeof(struct mlx5_cqe) * (1 << log_desc_n);
+	attr.db_umem_offset = sizeof(struct mlx5_cqe) * cq_size;
 	attr.eqn = priv->eqn;
 	attr.log_cq_size = log_desc_n;
 	attr.log_page_size = rte_log2_u32(pgsize);
@@ -187,7 +187,8 @@
 	}
 	cq->callfd = callfd;
 	/* Init CQ to ones to be in HW owner in the start. */
-	memset((void *)(uintptr_t)cq->umem_buf, 0xFF, attr.db_umem_offset);
+	cq->cqes[0].op_own = MLX5_CQE_OWNER_MASK;
+	cq->cqes[0].wqe_counter = rte_cpu_to_be_16(cq_size - 1);
 	/* First arming. */
 	mlx5_vdpa_cq_arm(priv, cq);
 	return 0;
@@ -203,34 +204,40 @@
 				container_of(cq, struct mlx5_vdpa_event_qp, cq);
 	const unsigned int cq_size = 1 << cq->log_desc_n;
 	const unsigned int cq_mask = cq_size - 1;
-	uint32_t total = 0;
-	int ret;
-
-	do {
-		volatile struct mlx5_cqe *cqe = cq->cqes + ((cq->cq_ci + total)
-							    & cq_mask);
-
-		ret = check_cqe(cqe, cq_size, cq->cq_ci + total);
-		switch (ret) {
-		case MLX5_CQE_STATUS_ERR:
+	union {
+		struct {
+			uint16_t wqe_counter;
+			uint8_t rsvd5;
+			uint8_t op_own;
+		};
+		uint32_t word;
+	} last_word;
+	uint16_t next_wqe_counter = cq->cq_ci & cq_mask;
+	uint16_t cur_wqe_counter;
+	uint16_t comp;
+
+	last_word.word = rte_read32(&cq->cqes[0].wqe_counter);
+	cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter);
+	comp = (cur_wqe_counter + 1u - next_wqe_counter) & cq_mask;
+	if (comp) {
+		cq->cq_ci += comp;
+		MLX5_ASSERT(!!(cq->cq_ci & cq_size) ==
+			    MLX5_CQE_OWNER(last_word.op_own));
+		MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) !=
+			    MLX5_CQE_INVALID);
+		if (unlikely(!(MLX5_CQE_OPCODE(last_word.op_own) ==
+			       MLX5_CQE_RESP_ERR ||
+			       MLX5_CQE_OPCODE(last_word.op_own) ==
+			       MLX5_CQE_REQ_ERR)))
 			cq->errors++;
-			/*fall-through*/
-		case MLX5_CQE_STATUS_SW_OWN:
-			total++;
-			break;
-		case MLX5_CQE_STATUS_HW_OWN:
-		default:
-			break;
-		}
-	} while (ret != MLX5_CQE_STATUS_HW_OWN);
-	rte_io_wmb();
-	cq->cq_ci += total;
-	/* Ring CQ doorbell record. */
-	cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
-	rte_io_wmb();
-	/* Ring SW QP doorbell record. */
-	eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
-	return total;
+		rte_io_wmb();
+		/* Ring CQ doorbell record. */
+		cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+		rte_io_wmb();
+		/* Ring SW QP doorbell record. */
+		eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
+	}
+	return comp;
 }
 
 static void
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2 3/3] vdpa/mlx5: control completion queue event mode
  2020-06-25 13:30 ` [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 1/3] vdpa/mlx5: optimize notification events Matan Azrad
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
@ 2020-06-25 13:30   ` Matan Azrad
  2020-06-29  9:16     ` Maxime Coquelin
  2020-06-29 14:01   ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  3 siblings, 1 reply; 16+ messages in thread
From: Matan Azrad @ 2020-06-25 13:30 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, Viacheslav Ovsiienko

The CQ polling is necessary in order to manage guest notifications when
the guest doesn't work with poll mode (callfd != -1).

The CQ polling scheduling method can affect the host CPU utilization and
the traffic bandwidth.

Define 3 modes to control the CQ polling scheduling:

1. A timer thread which automatically adjusts its delays to the coming
   traffic rate.
2. A timer thread with fixed delay time.
3. Interrupts: Each CQE burst arms the CQ in order to get an interrupt
   event in the next traffic burst.

When traffic becomes off, mode 3 is taken automatically.

The interrupt management takes a lot of CPU cycles but forward traffic
event to the guest very fast.

Timer thread save the interrupt overhead but may add delay for the guest
notification.

Add device arguments to control on the mode.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 doc/guides/vdpadevs/mlx5.rst        | 32 ++++++++++++++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.c       | 58 +++++++++++++++++++++++++++++++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.h       | 13 +++++++++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 56 +++++++++++++++++++++++++----------
 4 files changed, 144 insertions(+), 15 deletions(-)

diff --git a/doc/guides/vdpadevs/mlx5.rst b/doc/guides/vdpadevs/mlx5.rst
index dd377af..ab62542 100644
--- a/doc/guides/vdpadevs/mlx5.rst
+++ b/doc/guides/vdpadevs/mlx5.rst
@@ -106,8 +106,40 @@ Run-time configuration
 
 - **ethtool** operations on related kernel interfaces also affect the PMD.
 
+Driver options
+^^^^^^^^^^^^^^
+
 - ``class`` parameter [string]
 
   Select the class of the driver that should probe the device.
   `vdpa` for the mlx5 vDPA driver.
 
+- ``event_mode`` parameter [int]
+
+  0: Completion queue scheduling will be managed by a timer thread which
+     automatically adjusts its delays to the coming traffic rate.
+  1: Completion queue scheduling will be managed by a timer thread with fixed
+     delay time.
+  2: Completion queue scheduling will be managed by interrupts.
+     Each CQ burst arms the CQ in order to get an interrupt event in the next
+     traffic burst.
+
+     Default mode is 0.
+
+- ``event_us`` parameter [int]
+
+  Per mode micro-seconds parameter:
+  0: A nonzero value to set timer step in micro-seconds.
+     The timer thread dynamic delay change steps according to this value.
+     Default value is 50us.
+  1: A nonzero value to set fixed timer delay in micro-seconds.
+     Default value is 500us.
+  Relevant only for event mode 0 and 1.
+
+- ``no_traffic_time`` parameter [int]
+
+  A nonzero value defines the traffic off time, in seconds, that moves the
+  driver to no-traffic mode. In this mode the timer events are stopped and
+  interrupts are configured to the device in order to notify traffic for the
+  driver.
+  Default value is 2s.
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 8b0b3b8..3fb5e2c 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -43,6 +43,7 @@
 
 #define MLX5_VDPA_MAX_RETRIES 20
 #define MLX5_VDPA_USEC 1000
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
 
 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
 					      TAILQ_HEAD_INITIALIZER(priv_list);
@@ -605,6 +606,62 @@
 	return -rte_errno;
 }
 
+static int
+mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
+{
+	struct mlx5_vdpa_priv *priv = opaque;
+	unsigned long tmp;
+
+	if (strcmp(key, "class") == 0)
+		return 0;
+	errno = 0;
+	tmp = strtoul(val, NULL, 0);
+	if (errno) {
+		DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
+		return -errno;
+	}
+	if (strcmp(key, "event_mode") == 0) {
+		if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
+			priv->event_mode = (int)tmp;
+		else
+			DRV_LOG(WARNING, "Invalid event_mode %s.", val);
+	} else if (strcmp(key, "event_us") == 0) {
+		priv->event_us = (uint32_t)tmp;
+	} else if (strcmp(key, "no_traffic_time") == 0) {
+		priv->no_traffic_time_s = (uint32_t)tmp;
+	} else {
+		DRV_LOG(WARNING, "Invalid key %s.", key);
+	}
+	return 0;
+}
+
+static void
+mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
+{
+	struct rte_kvargs *kvlist;
+
+	priv->event_mode = MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER;
+	priv->event_us = 0;
+	priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+	if (devargs == NULL)
+		return;
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (kvlist == NULL)
+		return;
+	rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv);
+	rte_kvargs_free(kvlist);
+	if (!priv->event_us) {
+		if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
+			priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
+		else if (priv->event_mode == MLX5_VDPA_EVENT_MODE_FIXED_TIMER)
+			priv->event_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+	}
+	priv->timer_delay_us = priv->event_us;
+	DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
+	DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
+	DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+}
+
 /**
  * DPDK callback to register a PCI device.
  *
@@ -694,6 +751,7 @@
 		rte_errno = rte_errno ? rte_errno : EINVAL;
 		goto error;
 	}
+	mlx5_vdpa_config_get(pci_dev->device.devargs, priv);
 	SLIST_INIT(&priv->mr_list);
 	pthread_mutex_lock(&priv_list_lock);
 	TAILQ_INSERT_TAIL(&priv_list, priv, next);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index ae1dcd8..c0228b2 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -35,6 +35,9 @@
 #define VIRTIO_F_RING_PACKED 34
 #endif
 
+#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
+#define MLX5_VDPA_DEFAULT_TIMER_STEP_US 50
+
 struct mlx5_vdpa_cq {
 	uint16_t log_desc_n;
 	uint32_t cq_ci:24;
@@ -100,16 +103,26 @@ struct mlx5_vdpa_steer {
 	} rss[7];
 };
 
+enum {
+	MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER,
+	MLX5_VDPA_EVENT_MODE_FIXED_TIMER,
+	MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT
+};
+
 struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
 	uint8_t direct_notifier; /* Whether direct notifier is on or off. */
 	uint64_t last_traffic_tic;
+	uint32_t last_total;
 	pthread_t timer_tid;
 	pthread_mutex_t timer_lock;
 	pthread_cond_t timer_cond;
 	volatile uint8_t timer_on;
+	int event_mode;
+	uint32_t event_us;
 	uint32_t timer_delay_us;
+	uint32_t no_traffic_time_s;
 	int id; /* vDPA device id. */
 	int vid; /* vhost device id. */
 	struct ibv_context *ctx; /* Device context. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 25f11fd..7e1204f 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -20,9 +20,6 @@
 #include "mlx5_vdpa.h"
 
 
-#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
-#define MLX5_VDPA_NO_TRAFFIC_TIME_S 2LLU
-
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -175,7 +172,8 @@
 		rte_errno = errno;
 		goto error;
 	}
-	if (callfd != -1) {
+	if (callfd != -1 &&
+	    priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
 		ret = mlx5_glue->devx_subscribe_devx_event_fd(priv->eventc,
 							      callfd,
 							      cq->cq->obj, 0);
@@ -253,6 +251,25 @@
 	}
 }
 
+static void
+mlx5_vdpa_timer_delay(struct mlx5_vdpa_priv *priv, uint32_t total)
+{
+	if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) {
+		if (total == 0 || priv->last_total == 0) {
+			priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+		} else {
+			if (total > priv->last_total) {
+				if (priv->timer_delay_us > priv->event_us)
+					priv->timer_delay_us -= priv->event_us;
+			} else {
+				priv->timer_delay_us += priv->event_us;
+			}
+		}
+		priv->last_total = total;
+	}
+	usleep(priv->timer_delay_us);
+}
+
 static void *
 mlx5_vdpa_poll_handle(void *arg)
 {
@@ -286,12 +303,13 @@
 		if (!total) {
 			/* No traffic ? stop timer and load interrupts. */
 			if (current_tic - priv->last_traffic_tic >=
-			    rte_get_timer_hz() * MLX5_VDPA_NO_TRAFFIC_TIME_S) {
+			    rte_get_timer_hz() * priv->no_traffic_time_s) {
 				DRV_LOG(DEBUG, "Device %d traffic was stopped.",
 					priv->id);
 				mlx5_vdpa_arm_all_cqs(priv);
 				pthread_mutex_lock(&priv->timer_lock);
 				priv->timer_on = 0;
+				priv->last_total = 0;
 				while (!priv->timer_on)
 					pthread_cond_wait(&priv->timer_cond,
 							  &priv->timer_lock);
@@ -301,7 +319,7 @@
 		} else {
 			priv->last_traffic_tic = current_tic;
 		}
-		usleep(priv->timer_delay_us);
+		mlx5_vdpa_timer_delay(priv, total);
 	}
 	return NULL;
 }
@@ -327,6 +345,13 @@
 						   struct mlx5_vdpa_virtq, eqp);
 
 		mlx5_vdpa_cq_poll(cq);
+		if (priv->event_mode == MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
+			mlx5_vdpa_cq_arm(priv, cq);
+			/* Notify guest for descs consuming. */
+			if (cq->callfd != -1)
+				eventfd_write(cq->callfd, (eventfd_t)1);
+			return;
+		}
 		/* Don't arm again - timer will take control. */
 		DRV_LOG(DEBUG, "Device %d virtq %d cq %d event was captured."
 			" Timer is %s, cq ci is %u.\n", priv->id,
@@ -355,15 +380,16 @@
 	if (!priv->eventc)
 		/* All virtqs are in poll mode. */
 		return 0;
-	pthread_mutex_init(&priv->timer_lock, NULL);
-	pthread_cond_init(&priv->timer_cond, NULL);
-	priv->timer_on = 0;
-	priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
-	ret = pthread_create(&priv->timer_tid, NULL, mlx5_vdpa_poll_handle,
-			     (void *)priv);
-	if (ret) {
-		DRV_LOG(ERR, "Failed to create timer thread.");
-		return -1;
+	if (priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
+		pthread_mutex_init(&priv->timer_lock, NULL);
+		pthread_cond_init(&priv->timer_cond, NULL);
+		priv->timer_on = 0;
+		ret = pthread_create(&priv->timer_tid, NULL,
+				     mlx5_vdpa_poll_handle, (void *)priv);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to create timer thread.");
+			return -1;
+		}
 	}
 	flags = fcntl(priv->eventc->fd, F_GETFL);
 	ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] vdpa/mlx5: optimize notification events
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 1/3] vdpa/mlx5: optimize notification events Matan Azrad
@ 2020-06-29  9:05     ` Maxime Coquelin
  0 siblings, 0 replies; 16+ messages in thread
From: Maxime Coquelin @ 2020-06-29  9:05 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev, Viacheslav Ovsiienko



On 6/25/20 3:30 PM, Matan Azrad wrote:
> When the virtio guest driver doesn't work with poll mode, the driver
> creates event mechanism in order to schedule completion notifications
> for each virtq burst traffic.
> 
> When traffic comes to a virtq, a CQE will be added to the virtq CQ by
> the FW.
> The driver requests interrupt for the next CQE index, and when interrupt
> is triggered, the driver polls the CQ and notifies the guest by virtq
> callfd writing.
> 
> According to the described method, the interrupts will be triggered for
> each burst of trrafic. The burst size depends on interrupt latancy.
> 
> Interrupts management takes a lot of CPU cycles and using it for each
> traffic burst takes big portion of CPU capacity.
> 
> When traffic is on, using timer for CQ poll scheduling instead of
> interrupts saves a lot of CPU cycles.
> 
> Move CQ poll scheduling to be done by timer in case of running traffic.
> Request interrupts only when traffic is off.
> 
> The timer scheduling management is done by a new dedicated thread uses
> a usleep command.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/vdpa/mlx5/Makefile          |   1 +
>  drivers/vdpa/mlx5/mlx5_vdpa.h       |   7 ++
>  drivers/vdpa/mlx5/mlx5_vdpa_event.c | 175 ++++++++++++++++++++++++++++++------
>  3 files changed, 157 insertions(+), 26 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] vdpa/mlx5: optimize completion queue poll
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
@ 2020-06-29  9:11     ` Maxime Coquelin
  0 siblings, 0 replies; 16+ messages in thread
From: Maxime Coquelin @ 2020-06-29  9:11 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev, Viacheslav Ovsiienko



On 6/25/20 3:30 PM, Matan Azrad wrote:
> The vDPA driver uses a CQ in order to know when traffic works were
> completed by the HW.
> 
> Each traffic burst completion adds a CQE to the CQ.
> 
> When the vDPA driver detects CQEs in the CQ, it triggers the guest
> notification for the corresponding queue and consumes all of them.
> 
> There is collapse feature in the HW that configures the HW to write all the
> CQEs in the first entry of the CQ.
> 
> Using this feature, the vDPA driver can read only the first CQE,
> validate that the completion counter inside the CQE was changed and if
> so, to notify the guest.
> 
> Use CQ collapse feature in order to improve the poll utilization.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/vdpa/mlx5/mlx5_vdpa_event.c | 73 ++++++++++++++++++++-----------------
>  1 file changed, 40 insertions(+), 33 deletions(-)
> 
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/3] vdpa/mlx5: control completion queue event mode
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 3/3] vdpa/mlx5: control completion queue event mode Matan Azrad
@ 2020-06-29  9:16     ` Maxime Coquelin
  0 siblings, 0 replies; 16+ messages in thread
From: Maxime Coquelin @ 2020-06-29  9:16 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev, Viacheslav Ovsiienko



On 6/25/20 3:30 PM, Matan Azrad wrote:
> The CQ polling is necessary in order to manage guest notifications when
> the guest doesn't work with poll mode (callfd != -1).
> 
> The CQ polling scheduling method can affect the host CPU utilization and
> the traffic bandwidth.
> 
> Define 3 modes to control the CQ polling scheduling:
> 
> 1. A timer thread which automatically adjusts its delays to the coming
>    traffic rate.
> 2. A timer thread with fixed delay time.
> 3. Interrupts: Each CQE burst arms the CQ in order to get an interrupt
>    event in the next traffic burst.
> 
> When traffic becomes off, mode 3 is taken automatically.
> 
> The interrupt management takes a lot of CPU cycles but forward traffic
> event to the guest very fast.
> 
> Timer thread save the interrupt overhead but may add delay for the guest
> notification.
> 
> Add device arguments to control on the mode.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  doc/guides/vdpadevs/mlx5.rst        | 32 ++++++++++++++++++++
>  drivers/vdpa/mlx5/mlx5_vdpa.c       | 58 +++++++++++++++++++++++++++++++++++++
>  drivers/vdpa/mlx5/mlx5_vdpa.h       | 13 +++++++++
>  drivers/vdpa/mlx5/mlx5_vdpa_event.c | 56 +++++++++++++++++++++++++----------
>  4 files changed, 144 insertions(+), 15 deletions(-)

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization
  2020-06-25 13:30 ` [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
                     ` (2 preceding siblings ...)
  2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 3/3] vdpa/mlx5: control completion queue event mode Matan Azrad
@ 2020-06-29 14:01   ` Matan Azrad
  2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 1/3] vdpa/mlx5: optimize notification events Matan Azrad
                       ` (3 more replies)
  3 siblings, 4 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-29 14:01 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, Viacheslav Ovsiienko

Defines new event modes in order to schedule CQ polling.

Timer thread is created in mode 0 and 1 in order to save CPU utilization.

v2:
Add devargs to control the events mode.

v3:
rebase.
Improve mode 0 latency.

Matan Azrad (3):
  vdpa/mlx5: optimize notification events
  vdpa/mlx5: optimize completion queue poll
  vdpa/mlx5: control completion queue event mode

 doc/guides/vdpadevs/mlx5.rst        |  31 +++++
 drivers/vdpa/mlx5/Makefile          |   1 +
 drivers/vdpa/mlx5/mlx5_vdpa.c       |  57 ++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.h       |  19 +++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 257 +++++++++++++++++++++++++++++-------
 5 files changed, 318 insertions(+), 47 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v3 1/3] vdpa/mlx5: optimize notification events
  2020-06-29 14:01   ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
@ 2020-06-29 14:01     ` Matan Azrad
  2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-29 14:01 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, Viacheslav Ovsiienko

When the virtio guest driver doesn't work with poll mode, the driver
creates event mechanism in order to schedule completion notifications
for each virtq burst traffic.

When traffic comes to a virtq, a CQE will be added to the virtq CQ by
the FW.
The driver requests interrupt for the next CQE index, and when interrupt
is triggered, the driver polls the CQ and notifies the guest by virtq
callfd writing.

According to the described method, the interrupts will be triggered for
each burst of trrafic. The burst size depends on interrupt latancy.

Interrupts management takes a lot of CPU cycles and using it for each
traffic burst takes big portion of CPU capacity.

When traffic is on, using timer for CQ poll scheduling instead of
interrupts saves a lot of CPU cycles.

Move CQ poll scheduling to be done by timer in case of running traffic.
Request interrupts only when traffic is off.

The timer scheduling management is done by a new dedicated thread uses
a usleep command.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/mlx5/Makefile          |   1 +
 drivers/vdpa/mlx5/mlx5_vdpa.h       |   7 ++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 175 ++++++++++++++++++++++++++++++------
 3 files changed, 157 insertions(+), 26 deletions(-)

diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
index 91c89d6..8a1c2ea 100644
--- a/drivers/vdpa/mlx5/Makefile
+++ b/drivers/vdpa/mlx5/Makefile
@@ -31,6 +31,7 @@ CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
 LDLIBS += -lrte_common_mlx5
 LDLIBS += -lrte_eal -lrte_vhost -lrte_kvargs -lrte_pci -lrte_bus_pci -lrte_sched
+LDLIBS += -pthread
 
 # A few warnings cannot be avoided in external headers.
 CFLAGS += -Wno-error=cast-qual
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index 80b4c4b..ae1dcd8 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -39,6 +39,7 @@ struct mlx5_vdpa_cq {
 	uint16_t log_desc_n;
 	uint32_t cq_ci:24;
 	uint32_t arm_sn:2;
+	uint32_t armed:1;
 	int callfd;
 	rte_spinlock_t sl;
 	struct mlx5_devx_obj *cq;
@@ -103,6 +104,12 @@ struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
 	uint8_t direct_notifier; /* Whether direct notifier is on or off. */
+	uint64_t last_traffic_tic;
+	pthread_t timer_tid;
+	pthread_mutex_t timer_lock;
+	pthread_cond_t timer_cond;
+	volatile uint8_t timer_on;
+	uint32_t timer_delay_us;
 	int id; /* vDPA device id. */
 	int vid; /* vhost device id. */
 	struct ibv_context *ctx; /* Device context. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index dd60150..69c8bf6 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -12,6 +12,7 @@
 #include <rte_atomic.h>
 #include <rte_common.h>
 #include <rte_io.h>
+#include <rte_alarm.h>
 
 #include <mlx5_common.h>
 
@@ -19,6 +20,9 @@
 #include "mlx5_vdpa.h"
 
 
+#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
+#define MLX5_VDPA_NO_TRAFFIC_TIME_S 2LLU
+
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -26,10 +30,23 @@
 		mlx5_glue->devx_free_uar(priv->uar);
 		priv->uar = NULL;
 	}
+#ifdef HAVE_IBV_DEVX_EVENT
 	if (priv->eventc) {
+		union {
+			struct mlx5dv_devx_async_event_hdr event_resp;
+			uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr)
+									 + 128];
+		} out;
+
+		/* Clean all pending events. */
+		while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
+		       sizeof(out.buf)) >=
+		       (ssize_t)sizeof(out.event_resp.cookie))
+			;
 		mlx5_glue->devx_destroy_event_channel(priv->eventc);
 		priv->eventc = NULL;
 	}
+#endif
 	priv->eqn = 0;
 }
 
@@ -79,7 +96,7 @@
 	memset(cq, 0, sizeof(*cq));
 }
 
-static inline void
+static inline void __rte_unused
 mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
 {
 	uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET;
@@ -100,6 +117,7 @@
 	*((uint32_t *)addr + 1) = db_be >> 32;
 #endif
 	cq->arm_sn++;
+	cq->armed = 1;
 }
 
 static int
@@ -157,6 +175,16 @@
 		rte_errno = errno;
 		goto error;
 	}
+	if (callfd != -1) {
+		ret = mlx5_glue->devx_subscribe_devx_event_fd(priv->eventc,
+							      callfd,
+							      cq->cq->obj, 0);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to subscribe CQE event fd.");
+			rte_errno = errno;
+			goto error;
+		}
+	}
 	cq->callfd = callfd;
 	/* Init CQ to ones to be in HW owner in the start. */
 	memset((void *)(uintptr_t)cq->umem_buf, 0xFF, attr.db_umem_offset);
@@ -168,27 +196,27 @@
 	return -1;
 }
 
-static inline void __rte_unused
-mlx5_vdpa_cq_poll(struct mlx5_vdpa_priv *priv __rte_unused,
-		  struct mlx5_vdpa_cq *cq)
+static inline uint32_t
+mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq)
 {
 	struct mlx5_vdpa_event_qp *eqp =
 				container_of(cq, struct mlx5_vdpa_event_qp, cq);
 	const unsigned int cq_size = 1 << cq->log_desc_n;
 	const unsigned int cq_mask = cq_size - 1;
+	uint32_t total = 0;
 	int ret;
 
 	do {
-		volatile struct mlx5_cqe *cqe = cq->cqes + (cq->cq_ci &
-							    cq_mask);
+		volatile struct mlx5_cqe *cqe = cq->cqes + ((cq->cq_ci + total)
+							    & cq_mask);
 
-		ret = check_cqe(cqe, cq_size, cq->cq_ci);
+		ret = check_cqe(cqe, cq_size, cq->cq_ci + total);
 		switch (ret) {
 		case MLX5_CQE_STATUS_ERR:
 			cq->errors++;
 			/*fall-through*/
 		case MLX5_CQE_STATUS_SW_OWN:
-			cq->cq_ci++;
+			total++;
 			break;
 		case MLX5_CQE_STATUS_HW_OWN:
 		default:
@@ -196,21 +224,86 @@
 		}
 	} while (ret != MLX5_CQE_STATUS_HW_OWN);
 	rte_io_wmb();
+	cq->cq_ci += total;
 	/* Ring CQ doorbell record. */
 	cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
 	rte_io_wmb();
 	/* Ring SW QP doorbell record. */
 	eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
+	return total;
+}
+
+static void
+mlx5_vdpa_arm_all_cqs(struct mlx5_vdpa_priv *priv)
+{
+	struct mlx5_vdpa_cq *cq;
+	int i;
+
+	for (i = 0; i < priv->nr_virtqs; i++) {
+		cq = &priv->virtqs[i].eqp.cq;
+		if (cq->cq && !cq->armed)
+			mlx5_vdpa_cq_arm(priv, cq);
+	}
+}
+
+static void *
+mlx5_vdpa_poll_handle(void *arg)
+{
+	struct mlx5_vdpa_priv *priv = arg;
+	int i;
+	struct mlx5_vdpa_cq *cq;
+	uint32_t total;
+	uint64_t current_tic;
+
+	pthread_mutex_lock(&priv->timer_lock);
+	while (!priv->timer_on)
+		pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
+	pthread_mutex_unlock(&priv->timer_lock);
+	while (1) {
+		total = 0;
+		for (i = 0; i < priv->nr_virtqs; i++) {
+			cq = &priv->virtqs[i].eqp.cq;
+			if (cq->cq && !cq->armed) {
+				uint32_t comp = mlx5_vdpa_cq_poll(cq);
+
+				if (comp) {
+					/* Notify guest for descs consuming. */
+					if (cq->callfd != -1)
+						eventfd_write(cq->callfd,
+							      (eventfd_t)1);
+					total += comp;
+				}
+			}
+		}
+		current_tic = rte_rdtsc();
+		if (!total) {
+			/* No traffic ? stop timer and load interrupts. */
+			if (current_tic - priv->last_traffic_tic >=
+			    rte_get_timer_hz() * MLX5_VDPA_NO_TRAFFIC_TIME_S) {
+				DRV_LOG(DEBUG, "Device %d traffic was stopped.",
+					priv->id);
+				mlx5_vdpa_arm_all_cqs(priv);
+				pthread_mutex_lock(&priv->timer_lock);
+				priv->timer_on = 0;
+				while (!priv->timer_on)
+					pthread_cond_wait(&priv->timer_cond,
+							  &priv->timer_lock);
+				pthread_mutex_unlock(&priv->timer_lock);
+				continue;
+			}
+		} else {
+			priv->last_traffic_tic = current_tic;
+		}
+		usleep(priv->timer_delay_us);
+	}
+	return NULL;
 }
 
 static void
 mlx5_vdpa_interrupt_handler(void *cb_arg)
 {
-#ifndef HAVE_IBV_DEVX_EVENT
-	(void)cb_arg;
-	return;
-#else
 	struct mlx5_vdpa_priv *priv = cb_arg;
+#ifdef HAVE_IBV_DEVX_EVENT
 	union {
 		struct mlx5dv_devx_async_event_hdr event_resp;
 		uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
@@ -221,17 +314,29 @@
 				       (ssize_t)sizeof(out.event_resp.cookie)) {
 		struct mlx5_vdpa_cq *cq = (struct mlx5_vdpa_cq *)
 					       (uintptr_t)out.event_resp.cookie;
-		rte_spinlock_lock(&cq->sl);
-		mlx5_vdpa_cq_poll(priv, cq);
-		mlx5_vdpa_cq_arm(priv, cq);
-		if (cq->callfd != -1)
-			/* Notify guest for descriptors consuming. */
-			eventfd_write(cq->callfd, (eventfd_t)1);
-		rte_spinlock_unlock(&cq->sl);
-		DRV_LOG(DEBUG, "CQ %d event: new cq_ci = %u.", cq->cq->id,
-			cq->cq_ci);
+		struct mlx5_vdpa_event_qp *eqp = container_of(cq,
+						 struct mlx5_vdpa_event_qp, cq);
+		struct mlx5_vdpa_virtq *virtq = container_of(eqp,
+						   struct mlx5_vdpa_virtq, eqp);
+
+		mlx5_vdpa_cq_poll(cq);
+		/* Don't arm again - timer will take control. */
+		DRV_LOG(DEBUG, "Device %d virtq %d cq %d event was captured."
+			" Timer is %s, cq ci is %u.\n", priv->id,
+			(int)virtq->index, cq->cq->id, priv->timer_on ? "on" :
+			"off", cq->cq_ci);
+		cq->armed = 0;
+	}
+#endif
+
+	/* Traffic detected: make sure timer is on. */
+	priv->last_traffic_tic = rte_rdtsc();
+	pthread_mutex_lock(&priv->timer_lock);
+	if (!priv->timer_on) {
+		priv->timer_on = 1;
+		pthread_cond_signal(&priv->timer_cond);
 	}
-#endif /* HAVE_IBV_DEVX_ASYNC */
+	pthread_mutex_unlock(&priv->timer_lock);
 }
 
 int
@@ -243,12 +348,21 @@
 	if (!priv->eventc)
 		/* All virtqs are in poll mode. */
 		return 0;
+	pthread_mutex_init(&priv->timer_lock, NULL);
+	pthread_cond_init(&priv->timer_cond, NULL);
+	priv->timer_on = 0;
+	priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+	ret = pthread_create(&priv->timer_tid, NULL, mlx5_vdpa_poll_handle,
+			     (void *)priv);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to create timer thread.");
+		return -1;
+	}
 	flags = fcntl(priv->eventc->fd, F_GETFL);
 	ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
 	if (ret) {
 		DRV_LOG(ERR, "Failed to change event channel FD.");
-		rte_errno = errno;
-		return -rte_errno;
+		goto error;
 	}
 	priv->intr_handle.fd = priv->eventc->fd;
 	priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
@@ -256,9 +370,12 @@
 				       mlx5_vdpa_interrupt_handler, priv)) {
 		priv->intr_handle.fd = 0;
 		DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno);
-		return -rte_errno;
+		goto error;
 	}
 	return 0;
+error:
+	mlx5_vdpa_cqe_event_unset(priv);
+	return -1;
 }
 
 void
@@ -266,6 +383,7 @@
 {
 	int retries = MLX5_VDPA_INTR_RETRIES;
 	int ret = -EAGAIN;
+	void *status;
 
 	if (priv->intr_handle.fd) {
 		while (retries-- && ret == -EAGAIN) {
@@ -276,11 +394,16 @@
 				DRV_LOG(DEBUG, "Try again to unregister fd %d "
 					"of CQ interrupt, retries = %d.",
 					priv->intr_handle.fd, retries);
-				usleep(MLX5_VDPA_INTR_RETRIES_USEC);
+				rte_pause();
 			}
 		}
 		memset(&priv->intr_handle, 0, sizeof(priv->intr_handle));
 	}
+	if (priv->timer_tid) {
+		pthread_cancel(priv->timer_tid);
+		pthread_join(priv->timer_tid, &status);
+	}
+	priv->timer_tid = 0;
 }
 
 void
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v3 2/3] vdpa/mlx5: optimize completion queue poll
  2020-06-29 14:01   ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 1/3] vdpa/mlx5: optimize notification events Matan Azrad
@ 2020-06-29 14:01     ` Matan Azrad
  2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 3/3] vdpa/mlx5: control completion queue event mode Matan Azrad
  2020-06-29 17:24     ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Maxime Coquelin
  3 siblings, 0 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-29 14:01 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, Viacheslav Ovsiienko

The vDPA driver uses a CQ in order to know when traffic works were
completed by the HW.

Each traffic burst completion adds a CQE to the CQ.

When the vDPA driver detects CQEs in the CQ, it triggers the guest
notification for the corresponding queue and consumes all of them.

There is collapse feature in the HW that configures the HW to write all the
CQEs in the first entry of the CQ.

Using this feature, the vDPA driver can read only the first CQE,
validate that the completion counter inside the CQE was changed and if
so, to notify the guest.

Use CQ collapse feature in order to improve the poll utilization.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 73 ++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 69c8bf6..25f11fd 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -127,12 +127,12 @@
 	struct mlx5_devx_cq_attr attr;
 	size_t pgsize = sysconf(_SC_PAGESIZE);
 	uint32_t umem_size;
-	int ret;
 	uint16_t event_nums[1] = {0};
+	uint16_t cq_size = 1 << log_desc_n;
+	int ret;
 
 	cq->log_desc_n = log_desc_n;
-	umem_size = sizeof(struct mlx5_cqe) * (1 << log_desc_n) +
-							sizeof(*cq->db_rec) * 2;
+	umem_size = sizeof(struct mlx5_cqe) * cq_size + sizeof(*cq->db_rec) * 2;
 	cq->umem_buf = rte_zmalloc(__func__, umem_size, 4096);
 	if (!cq->umem_buf) {
 		DRV_LOG(ERR, "Failed to allocate memory for CQ.");
@@ -149,13 +149,13 @@
 	}
 	attr.q_umem_valid = 1;
 	attr.db_umem_valid = 1;
-	attr.use_first_only = 0;
+	attr.use_first_only = 1;
 	attr.overrun_ignore = 0;
 	attr.uar_page_id = priv->uar->page_id;
 	attr.q_umem_id = cq->umem_obj->umem_id;
 	attr.q_umem_offset = 0;
 	attr.db_umem_id = cq->umem_obj->umem_id;
-	attr.db_umem_offset = sizeof(struct mlx5_cqe) * (1 << log_desc_n);
+	attr.db_umem_offset = sizeof(struct mlx5_cqe) * cq_size;
 	attr.eqn = priv->eqn;
 	attr.log_cq_size = log_desc_n;
 	attr.log_page_size = rte_log2_u32(pgsize);
@@ -187,7 +187,8 @@
 	}
 	cq->callfd = callfd;
 	/* Init CQ to ones to be in HW owner in the start. */
-	memset((void *)(uintptr_t)cq->umem_buf, 0xFF, attr.db_umem_offset);
+	cq->cqes[0].op_own = MLX5_CQE_OWNER_MASK;
+	cq->cqes[0].wqe_counter = rte_cpu_to_be_16(cq_size - 1);
 	/* First arming. */
 	mlx5_vdpa_cq_arm(priv, cq);
 	return 0;
@@ -203,34 +204,40 @@
 				container_of(cq, struct mlx5_vdpa_event_qp, cq);
 	const unsigned int cq_size = 1 << cq->log_desc_n;
 	const unsigned int cq_mask = cq_size - 1;
-	uint32_t total = 0;
-	int ret;
-
-	do {
-		volatile struct mlx5_cqe *cqe = cq->cqes + ((cq->cq_ci + total)
-							    & cq_mask);
-
-		ret = check_cqe(cqe, cq_size, cq->cq_ci + total);
-		switch (ret) {
-		case MLX5_CQE_STATUS_ERR:
+	union {
+		struct {
+			uint16_t wqe_counter;
+			uint8_t rsvd5;
+			uint8_t op_own;
+		};
+		uint32_t word;
+	} last_word;
+	uint16_t next_wqe_counter = cq->cq_ci & cq_mask;
+	uint16_t cur_wqe_counter;
+	uint16_t comp;
+
+	last_word.word = rte_read32(&cq->cqes[0].wqe_counter);
+	cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter);
+	comp = (cur_wqe_counter + 1u - next_wqe_counter) & cq_mask;
+	if (comp) {
+		cq->cq_ci += comp;
+		MLX5_ASSERT(!!(cq->cq_ci & cq_size) ==
+			    MLX5_CQE_OWNER(last_word.op_own));
+		MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) !=
+			    MLX5_CQE_INVALID);
+		if (unlikely(!(MLX5_CQE_OPCODE(last_word.op_own) ==
+			       MLX5_CQE_RESP_ERR ||
+			       MLX5_CQE_OPCODE(last_word.op_own) ==
+			       MLX5_CQE_REQ_ERR)))
 			cq->errors++;
-			/*fall-through*/
-		case MLX5_CQE_STATUS_SW_OWN:
-			total++;
-			break;
-		case MLX5_CQE_STATUS_HW_OWN:
-		default:
-			break;
-		}
-	} while (ret != MLX5_CQE_STATUS_HW_OWN);
-	rte_io_wmb();
-	cq->cq_ci += total;
-	/* Ring CQ doorbell record. */
-	cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
-	rte_io_wmb();
-	/* Ring SW QP doorbell record. */
-	eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
-	return total;
+		rte_io_wmb();
+		/* Ring CQ doorbell record. */
+		cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+		rte_io_wmb();
+		/* Ring SW QP doorbell record. */
+		eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
+	}
+	return comp;
 }
 
 static void
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v3 3/3] vdpa/mlx5: control completion queue event mode
  2020-06-29 14:01   ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
  2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 1/3] vdpa/mlx5: optimize notification events Matan Azrad
  2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
@ 2020-06-29 14:01     ` Matan Azrad
  2020-06-29 17:24     ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Maxime Coquelin
  3 siblings, 0 replies; 16+ messages in thread
From: Matan Azrad @ 2020-06-29 14:01 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, Viacheslav Ovsiienko

The CQ polling is necessary in order to manage guest notifications when
the guest doesn't work with poll mode (callfd != -1).

The CQ polling scheduling method can affect the host CPU utilization and
the traffic bandwidth.

Define 3 modes to control the CQ polling scheduling:

1. A timer thread which automatically adjusts its delays to the coming
   traffic rate.
2. A timer thread with fixed delay time.
3. Interrupts: Each CQE burst arms the CQ in order to get an interrupt
   event in the next traffic burst.

When traffic becomes off, mode 3 is taken automatically.

The interrupt management takes a lot of CPU cycles but forward traffic
event to the guest very fast.

Timer thread save the interrupt overhead but may add delay for the guest
notification.

Add device arguments to control on the mode.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 doc/guides/vdpadevs/mlx5.rst        | 31 ++++++++++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.c       | 57 +++++++++++++++++++++++++++++
 drivers/vdpa/mlx5/mlx5_vdpa.h       | 12 +++++++
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 71 +++++++++++++++++++++++++++----------
 4 files changed, 152 insertions(+), 19 deletions(-)

diff --git a/doc/guides/vdpadevs/mlx5.rst b/doc/guides/vdpadevs/mlx5.rst
index dd377af..faa8182 100644
--- a/doc/guides/vdpadevs/mlx5.rst
+++ b/doc/guides/vdpadevs/mlx5.rst
@@ -106,8 +106,39 @@ Run-time configuration
 
 - **ethtool** operations on related kernel interfaces also affect the PMD.
 
+Driver options
+^^^^^^^^^^^^^^
+
 - ``class`` parameter [string]
 
   Select the class of the driver that should probe the device.
   `vdpa` for the mlx5 vDPA driver.
 
+- ``event_mode`` parameter [int]
+
+  - 0, Completion queue scheduling will be managed by a timer thread which
+    automatically adjusts its delays to the coming traffic rate.
+
+  - 1, Completion queue scheduling will be managed by a timer thread with fixed
+    delay time.
+
+  - 2, Completion queue scheduling will be managed by interrupts. Each CQ burst
+    arms the CQ in order to get an interrupt event in the next traffic burst.
+
+  - Default mode is 0.
+
+- ``event_us`` parameter [int]
+
+  Per mode micro-seconds parameter - relevant only for event mode 0 and 1:
+  - 0, A nonzero value to set timer step in micro-seconds. The timer thread
+    dynamic delay change steps according to this value. Default value is 1us.
+
+  - 1, A nonzero value to set fixed timer delay in micro-seconds. Default value
+    is 100us.
+
+- ``no_traffic_time`` parameter [int]
+
+  A nonzero value defines the traffic off time, in seconds, that moves the
+  driver to no-traffic mode. In this mode the timer events are stopped and
+  interrupts are configured to the device in order to notify traffic for the
+  driver. Default value is 2s.
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 8b0b3b8..159653f 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -43,6 +43,7 @@
 
 #define MLX5_VDPA_MAX_RETRIES 20
 #define MLX5_VDPA_USEC 1000
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
 
 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
 					      TAILQ_HEAD_INITIALIZER(priv_list);
@@ -605,6 +606,61 @@
 	return -rte_errno;
 }
 
+static int
+mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
+{
+	struct mlx5_vdpa_priv *priv = opaque;
+	unsigned long tmp;
+
+	if (strcmp(key, "class") == 0)
+		return 0;
+	errno = 0;
+	tmp = strtoul(val, NULL, 0);
+	if (errno) {
+		DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
+		return -errno;
+	}
+	if (strcmp(key, "event_mode") == 0) {
+		if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
+			priv->event_mode = (int)tmp;
+		else
+			DRV_LOG(WARNING, "Invalid event_mode %s.", val);
+	} else if (strcmp(key, "event_us") == 0) {
+		priv->event_us = (uint32_t)tmp;
+	} else if (strcmp(key, "no_traffic_time") == 0) {
+		priv->no_traffic_time_s = (uint32_t)tmp;
+	} else {
+		DRV_LOG(WARNING, "Invalid key %s.", key);
+	}
+	return 0;
+}
+
+static void
+mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
+{
+	struct rte_kvargs *kvlist;
+
+	priv->event_mode = MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER;
+	priv->event_us = 0;
+	priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+	if (devargs == NULL)
+		return;
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (kvlist == NULL)
+		return;
+	rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv);
+	rte_kvargs_free(kvlist);
+	if (!priv->event_us) {
+		if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
+			priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
+		else if (priv->event_mode == MLX5_VDPA_EVENT_MODE_FIXED_TIMER)
+			priv->event_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+	}
+	DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
+	DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
+	DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+}
+
 /**
  * DPDK callback to register a PCI device.
  *
@@ -694,6 +750,7 @@
 		rte_errno = rte_errno ? rte_errno : EINVAL;
 		goto error;
 	}
+	mlx5_vdpa_config_get(pci_dev->device.devargs, priv);
 	SLIST_INIT(&priv->mr_list);
 	pthread_mutex_lock(&priv_list_lock);
 	TAILQ_INSERT_TAIL(&priv_list, priv, next);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index ae1dcd8..2ee5aae 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -35,6 +35,9 @@
 #define VIRTIO_F_RING_PACKED 34
 #endif
 
+#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 100u
+#define MLX5_VDPA_DEFAULT_TIMER_STEP_US 1u
+
 struct mlx5_vdpa_cq {
 	uint16_t log_desc_n;
 	uint32_t cq_ci:24;
@@ -100,6 +103,12 @@ struct mlx5_vdpa_steer {
 	} rss[7];
 };
 
+enum {
+	MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER,
+	MLX5_VDPA_EVENT_MODE_FIXED_TIMER,
+	MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT
+};
+
 struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
@@ -109,7 +118,10 @@ struct mlx5_vdpa_priv {
 	pthread_mutex_t timer_lock;
 	pthread_cond_t timer_cond;
 	volatile uint8_t timer_on;
+	int event_mode;
+	uint32_t event_us;
 	uint32_t timer_delay_us;
+	uint32_t no_traffic_time_s;
 	int id; /* vDPA device id. */
 	int vid; /* vhost device id. */
 	struct ibv_context *ctx; /* Device context. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 25f11fd..ac61a41 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -20,9 +20,6 @@
 #include "mlx5_vdpa.h"
 
 
-#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
-#define MLX5_VDPA_NO_TRAFFIC_TIME_S 2LLU
-
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -175,7 +172,8 @@
 		rte_errno = errno;
 		goto error;
 	}
-	if (callfd != -1) {
+	if (callfd != -1 &&
+	    priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
 		ret = mlx5_glue->devx_subscribe_devx_event_fd(priv->eventc,
 							      callfd,
 							      cq->cq->obj, 0);
@@ -253,21 +251,43 @@
 	}
 }
 
+static void
+mlx5_vdpa_timer_sleep(struct mlx5_vdpa_priv *priv, uint32_t max)
+{
+	if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) {
+		switch (max) {
+		case 0:
+			priv->timer_delay_us += priv->event_us;
+			break;
+		case 1:
+			break;
+		default:
+			priv->timer_delay_us /= max;
+			break;
+		}
+	}
+	usleep(priv->timer_delay_us);
+}
+
 static void *
 mlx5_vdpa_poll_handle(void *arg)
 {
 	struct mlx5_vdpa_priv *priv = arg;
 	int i;
 	struct mlx5_vdpa_cq *cq;
-	uint32_t total;
+	uint32_t max;
 	uint64_t current_tic;
 
 	pthread_mutex_lock(&priv->timer_lock);
 	while (!priv->timer_on)
 		pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
 	pthread_mutex_unlock(&priv->timer_lock);
+	priv->timer_delay_us = priv->event_mode ==
+					    MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
+					      MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
+								 priv->event_us;
 	while (1) {
-		total = 0;
+		max = 0;
 		for (i = 0; i < priv->nr_virtqs; i++) {
 			cq = &priv->virtqs[i].eqp.cq;
 			if (cq->cq && !cq->armed) {
@@ -278,15 +298,16 @@
 					if (cq->callfd != -1)
 						eventfd_write(cq->callfd,
 							      (eventfd_t)1);
-					total += comp;
+					if (comp > max)
+						max = comp;
 				}
 			}
 		}
 		current_tic = rte_rdtsc();
-		if (!total) {
+		if (!max) {
 			/* No traffic ? stop timer and load interrupts. */
 			if (current_tic - priv->last_traffic_tic >=
-			    rte_get_timer_hz() * MLX5_VDPA_NO_TRAFFIC_TIME_S) {
+			    rte_get_timer_hz() * priv->no_traffic_time_s) {
 				DRV_LOG(DEBUG, "Device %d traffic was stopped.",
 					priv->id);
 				mlx5_vdpa_arm_all_cqs(priv);
@@ -296,12 +317,16 @@
 					pthread_cond_wait(&priv->timer_cond,
 							  &priv->timer_lock);
 				pthread_mutex_unlock(&priv->timer_lock);
+				priv->timer_delay_us = priv->event_mode ==
+					    MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
+					      MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
+								 priv->event_us;
 				continue;
 			}
 		} else {
 			priv->last_traffic_tic = current_tic;
 		}
-		usleep(priv->timer_delay_us);
+		mlx5_vdpa_timer_sleep(priv, max);
 	}
 	return NULL;
 }
@@ -327,6 +352,13 @@
 						   struct mlx5_vdpa_virtq, eqp);
 
 		mlx5_vdpa_cq_poll(cq);
+		if (priv->event_mode == MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
+			mlx5_vdpa_cq_arm(priv, cq);
+			/* Notify guest for descs consuming. */
+			if (cq->callfd != -1)
+				eventfd_write(cq->callfd, (eventfd_t)1);
+			return;
+		}
 		/* Don't arm again - timer will take control. */
 		DRV_LOG(DEBUG, "Device %d virtq %d cq %d event was captured."
 			" Timer is %s, cq ci is %u.\n", priv->id,
@@ -355,15 +387,16 @@
 	if (!priv->eventc)
 		/* All virtqs are in poll mode. */
 		return 0;
-	pthread_mutex_init(&priv->timer_lock, NULL);
-	pthread_cond_init(&priv->timer_cond, NULL);
-	priv->timer_on = 0;
-	priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
-	ret = pthread_create(&priv->timer_tid, NULL, mlx5_vdpa_poll_handle,
-			     (void *)priv);
-	if (ret) {
-		DRV_LOG(ERR, "Failed to create timer thread.");
-		return -1;
+	if (priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
+		pthread_mutex_init(&priv->timer_lock, NULL);
+		pthread_cond_init(&priv->timer_cond, NULL);
+		priv->timer_on = 0;
+		ret = pthread_create(&priv->timer_tid, NULL,
+				     mlx5_vdpa_poll_handle, (void *)priv);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to create timer thread.");
+			return -1;
+		}
 	}
 	flags = fcntl(priv->eventc->fd, F_GETFL);
 	ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization
  2020-06-29 14:01   ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
                       ` (2 preceding siblings ...)
  2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 3/3] vdpa/mlx5: control completion queue event mode Matan Azrad
@ 2020-06-29 17:24     ` Maxime Coquelin
  3 siblings, 0 replies; 16+ messages in thread
From: Maxime Coquelin @ 2020-06-29 17:24 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev, Viacheslav Ovsiienko



On 6/29/20 4:01 PM, Matan Azrad wrote:
> Defines new event modes in order to schedule CQ polling.
> 
> Timer thread is created in mode 0 and 1 in order to save CPU utilization.
> 
> v2:
> Add devargs to control the events mode.
> 
> v3:
> rebase.
> Improve mode 0 latency.
> 
> Matan Azrad (3):
>   vdpa/mlx5: optimize notification events
>   vdpa/mlx5: optimize completion queue poll
>   vdpa/mlx5: control completion queue event mode
> 
>  doc/guides/vdpadevs/mlx5.rst        |  31 +++++
>  drivers/vdpa/mlx5/Makefile          |   1 +
>  drivers/vdpa/mlx5/mlx5_vdpa.c       |  57 ++++++++
>  drivers/vdpa/mlx5/mlx5_vdpa.h       |  19 +++
>  drivers/vdpa/mlx5/mlx5_vdpa_event.c | 257 +++++++++++++++++++++++++++++-------
>  5 files changed, 318 insertions(+), 47 deletions(-)
> 

Applied to dpdk-next-virtio/master

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-06-29 17:24 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-18 19:11 [dpdk-dev] [PATCH 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
2020-06-18 19:11 ` [dpdk-dev] [PATCH 1/3] vdpa/mlx5: optimize notification events Matan Azrad
2020-06-18 19:11 ` [dpdk-dev] [PATCH 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
2020-06-18 19:11 ` [dpdk-dev] [PATCH 3/3] vdpa/mlx5: add traffic control device arguments Matan Azrad
2020-06-25 13:30 ` [dpdk-dev] [PATCH v2 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 1/3] vdpa/mlx5: optimize notification events Matan Azrad
2020-06-29  9:05     ` Maxime Coquelin
2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
2020-06-29  9:11     ` Maxime Coquelin
2020-06-25 13:30   ` [dpdk-dev] [PATCH v2 3/3] vdpa/mlx5: control completion queue event mode Matan Azrad
2020-06-29  9:16     ` Maxime Coquelin
2020-06-29 14:01   ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Matan Azrad
2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 1/3] vdpa/mlx5: optimize notification events Matan Azrad
2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 2/3] vdpa/mlx5: optimize completion queue poll Matan Azrad
2020-06-29 14:01     ` [dpdk-dev] [PATCH v3 3/3] vdpa/mlx5: control completion queue event mode Matan Azrad
2020-06-29 17:24     ` [dpdk-dev] [PATCH v3 0/3] vdpa/mlx5: optimize cpu utilization Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).