DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management
@ 2021-03-18  9:13 Matan Azrad
  2021-03-29 14:09 ` Maxime Coquelin
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Matan Azrad @ 2021-03-18  9:13 UTC (permalink / raw)
  To: dev; +Cc: Maxime Coquelin

The driver should notify the guest for each traffic burst detected by CQ
polling.

The CQ polling trigger is defined by `event_mode` device argument,
either by busy polling on all the CQs or by blocked call to HW
completion event using DevX channel.

Also, the polling event modes can move to blocked call when the
traffic rate is low.

The current blocked call uses the EAL interrupt API suffering a lot
of overhead in the API management and serve all the drivers and
libraries using only single thread.

Use blocking FD of the DevX channel in order to do blocked call
directly by the DevX channel FD mechanism.

Signed-off-by: Matan Azrad <matan@nvidia.com>
Acked-by: Xueming Li <xuemingl@nvidia.com>
---
 doc/guides/vdpadevs/mlx5.rst        |   8 +-
 drivers/vdpa/mlx5/mlx5_vdpa.c       |   8 +-
 drivers/vdpa/mlx5/mlx5_vdpa.h       |   8 +-
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 308 +++++++++++++++---------------------
 4 files changed, 134 insertions(+), 198 deletions(-)

diff --git a/doc/guides/vdpadevs/mlx5.rst b/doc/guides/vdpadevs/mlx5.rst
index 1f2ae6f..9b2f9f1 100644
--- a/doc/guides/vdpadevs/mlx5.rst
+++ b/doc/guides/vdpadevs/mlx5.rst
@@ -129,10 +129,10 @@ Driver options
 
 - ``no_traffic_time`` parameter [int]
 
-  A nonzero value defines the traffic off time, in seconds, that moves the
-  driver to no-traffic mode. In this mode the timer events are stopped and
-  interrupts are configured to the device in order to notify traffic for the
-  driver. Default value is 2s.
+  A nonzero value defines the traffic off time, in polling cycle time units,
+  that moves the driver to no-traffic mode. In this mode the polling is stopped
+  and interrupts are configured to the device in order to notify traffic for the
+  driver. Default value is 16.
 
 - ``event_core`` parameter [int]
 
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 4c2d886..5d70880 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -44,7 +44,7 @@
 
 #define MLX5_VDPA_MAX_RETRIES 20
 #define MLX5_VDPA_USEC 1000
-#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
 
 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
 					      TAILQ_HEAD_INITIALIZER(priv_list);
@@ -632,7 +632,7 @@
 	} else if (strcmp(key, "event_us") == 0) {
 		priv->event_us = (uint32_t)tmp;
 	} else if (strcmp(key, "no_traffic_time") == 0) {
-		priv->no_traffic_time_s = (uint32_t)tmp;
+		priv->no_traffic_max = (uint32_t)tmp;
 	} else if (strcmp(key, "event_core") == 0) {
 		if (tmp >= (unsigned long)n_cores)
 			DRV_LOG(WARNING, "Invalid event_core %s.", val);
@@ -658,7 +658,7 @@
 	priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
 	priv->event_us = 0;
 	priv->event_core = -1;
-	priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+	priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
 	if (devargs == NULL)
 		return;
 	kvlist = rte_kvargs_parse(devargs->args, NULL);
@@ -671,7 +671,7 @@
 		priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
 	DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
 	DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
-	DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+	DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
 }
 
 /**
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index 98c71aa..e4c8575 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -120,16 +120,13 @@ struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
 	pthread_mutex_t vq_config_lock;
-	uint64_t last_traffic_tic;
+	uint64_t no_traffic_counter;
 	pthread_t timer_tid;
-	pthread_mutex_t timer_lock;
-	pthread_cond_t timer_cond;
-	volatile uint8_t timer_on;
 	int event_mode;
 	int event_core; /* Event thread cpu affinity core. */
 	uint32_t event_us;
 	uint32_t timer_delay_us;
-	uint32_t no_traffic_time_s;
+	uint32_t no_traffic_max;
 	uint8_t hw_latency_mode; /* Hardware CQ moderation mode. */
 	uint16_t hw_max_latency_us; /* Hardware CQ moderation period in usec. */
 	uint16_t hw_max_pending_comp; /* Hardware CQ moderation counter. */
@@ -146,7 +143,6 @@ struct mlx5_vdpa_priv {
 	struct mlx5dv_devx_event_channel *eventc;
 	struct mlx5dv_devx_event_channel *err_chnl;
 	struct mlx5dv_devx_uar *uar;
-	struct rte_intr_handle intr_handle;
 	struct rte_intr_handle err_intr_handle;
 	struct mlx5_devx_obj *td;
 	struct mlx5_devx_obj *tiss[16]; /* TIS list for each LAG port. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 86adc86..64a1753 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -35,17 +35,6 @@
 	}
 #ifdef HAVE_IBV_DEVX_EVENT
 	if (priv->eventc) {
-		union {
-			struct mlx5dv_devx_async_event_hdr event_resp;
-			uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr)
-									 + 128];
-		} out;
-
-		/* Clean all pending events. */
-		while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
-		       sizeof(out.buf)) >=
-		       (ssize_t)sizeof(out.event_resp.cookie))
-			;
 		mlx5_os_devx_destroy_event_channel(priv->eventc);
 		priv->eventc = NULL;
 	}
@@ -56,8 +45,6 @@
 static int
 mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
 {
-	int flags, ret;
-
 	if (priv->eventc)
 		return 0;
 	priv->eventc = mlx5_os_devx_create_event_channel(priv->ctx,
@@ -68,12 +55,6 @@
 			rte_errno);
 		goto error;
 	}
-	flags = fcntl(priv->eventc->fd, F_GETFL);
-	ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
-	if (ret) {
-		DRV_LOG(ERR, "Failed to change event channel FD.");
-		goto error;
-	}
 	/*
 	 * This PMD always claims the write memory barrier on UAR
 	 * registers writings, it is safe to allocate UAR with any
@@ -237,122 +218,112 @@
 		pthread_yield();
 }
 
-static void *
-mlx5_vdpa_poll_handle(void *arg)
+/* Notify virtio device for specific virtq new traffic. */
+static uint32_t
+mlx5_vdpa_queue_complete(struct mlx5_vdpa_cq *cq)
 {
-	struct mlx5_vdpa_priv *priv = arg;
-	int i;
-	struct mlx5_vdpa_cq *cq;
-	uint32_t max;
-	uint64_t current_tic;
-
-	pthread_mutex_lock(&priv->timer_lock);
-	while (!priv->timer_on)
-		pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
-	pthread_mutex_unlock(&priv->timer_lock);
-	priv->timer_delay_us = priv->event_mode ==
-					    MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
-					      MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
-								 priv->event_us;
-	while (1) {
-		max = 0;
-		pthread_mutex_lock(&priv->vq_config_lock);
-		for (i = 0; i < priv->nr_virtqs; i++) {
-			cq = &priv->virtqs[i].eqp.cq;
-			if (cq->cq_obj.cq && !cq->armed) {
-				uint32_t comp = mlx5_vdpa_cq_poll(cq);
-
-				if (comp) {
-					/* Notify guest for descs consuming. */
-					if (cq->callfd != -1)
-						eventfd_write(cq->callfd,
-							      (eventfd_t)1);
-					if (comp > max)
-						max = comp;
-				}
-			}
-		}
-		current_tic = rte_rdtsc();
-		if (!max) {
-			/* No traffic ? stop timer and load interrupts. */
-			if (current_tic - priv->last_traffic_tic >=
-			    rte_get_timer_hz() * priv->no_traffic_time_s) {
-				DRV_LOG(DEBUG, "Device %s traffic was stopped.",
-					priv->vdev->device->name);
-				mlx5_vdpa_arm_all_cqs(priv);
-				pthread_mutex_unlock(&priv->vq_config_lock);
-				pthread_mutex_lock(&priv->timer_lock);
-				priv->timer_on = 0;
-				while (!priv->timer_on)
-					pthread_cond_wait(&priv->timer_cond,
-							  &priv->timer_lock);
-				pthread_mutex_unlock(&priv->timer_lock);
-				priv->timer_delay_us = priv->event_mode ==
-					    MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
-					      MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
-								 priv->event_us;
-				continue;
-			}
-		} else {
-			priv->last_traffic_tic = current_tic;
+	uint32_t comp = 0;
+
+	if (cq->cq_obj.cq) {
+		comp = mlx5_vdpa_cq_poll(cq);
+		if (comp) {
+			if (cq->callfd != -1)
+				eventfd_write(cq->callfd, (eventfd_t)1);
+			cq->armed = 0;
 		}
-		pthread_mutex_unlock(&priv->vq_config_lock);
-		mlx5_vdpa_timer_sleep(priv, max);
 	}
-	return NULL;
+	return comp;
 }
 
-static void
-mlx5_vdpa_interrupt_handler(void *cb_arg)
+/* Notify virtio device for any virtq new traffic. */
+static uint32_t
+mlx5_vdpa_queues_complete(struct mlx5_vdpa_priv *priv)
+{
+	int i;
+	uint32_t max = 0;
+
+	for (i = 0; i < priv->nr_virtqs; i++) {
+		struct mlx5_vdpa_cq *cq = &priv->virtqs[i].eqp.cq;
+		uint32_t comp = mlx5_vdpa_queue_complete(cq);
+
+		if (comp > max)
+			max = comp;
+	}
+	return max;
+}
+
+/* Wait on all CQs channel for completion event. */
+static struct mlx5_vdpa_cq *
+mlx5_vdpa_event_wait(struct mlx5_vdpa_priv *priv __rte_unused)
 {
-	struct mlx5_vdpa_priv *priv = cb_arg;
 #ifdef HAVE_IBV_DEVX_EVENT
 	union {
 		struct mlx5dv_devx_async_event_hdr event_resp;
 		uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
 	} out;
+	int ret = mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
+					    sizeof(out.buf));
 
-	pthread_mutex_lock(&priv->vq_config_lock);
-	while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
-					 sizeof(out.buf)) >=
-				       (ssize_t)sizeof(out.event_resp.cookie)) {
-		struct mlx5_vdpa_cq *cq = (struct mlx5_vdpa_cq *)
-					       (uintptr_t)out.event_resp.cookie;
-		struct mlx5_vdpa_event_qp *eqp = container_of(cq,
-						 struct mlx5_vdpa_event_qp, cq);
-		struct mlx5_vdpa_virtq *virtq = container_of(eqp,
-						   struct mlx5_vdpa_virtq, eqp);
-
-		if (!virtq->enable)
-			continue;
-		mlx5_vdpa_cq_poll(cq);
-		/* Notify guest for descs consuming. */
-		if (cq->callfd != -1)
-			eventfd_write(cq->callfd, (eventfd_t)1);
-		if (priv->event_mode == MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
-			mlx5_vdpa_cq_arm(priv, cq);
-			pthread_mutex_unlock(&priv->vq_config_lock);
-			return;
-		}
-		/* Don't arm again - timer will take control. */
-		DRV_LOG(DEBUG, "Device %s virtq %d cq %d event was captured."
-			" Timer is %s, cq ci is %u.\n",
-			priv->vdev->device->name,
-			(int)virtq->index, cq->cq_obj.cq->id,
-			priv->timer_on ? "on" : "off", cq->cq_ci);
-		cq->armed = 0;
-	}
+	if (ret >= 0)
+		return (struct mlx5_vdpa_cq *)(uintptr_t)out.event_resp.cookie;
+	DRV_LOG(INFO, "Got error in devx_get_event, ret = %d, errno = %d.",
+		ret, errno);
 #endif
+	return NULL;
+}
 
-	/* Traffic detected: make sure timer is on. */
-	priv->last_traffic_tic = rte_rdtsc();
-	pthread_mutex_lock(&priv->timer_lock);
-	if (!priv->timer_on) {
-		priv->timer_on = 1;
-		pthread_cond_signal(&priv->timer_cond);
+static void *
+mlx5_vdpa_event_handle(void *arg)
+{
+	struct mlx5_vdpa_priv *priv = arg;
+	struct mlx5_vdpa_cq *cq;
+	uint32_t max;
+
+	switch (priv->event_mode) {
+	case MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER:
+	case MLX5_VDPA_EVENT_MODE_FIXED_TIMER:
+		priv->timer_delay_us = priv->event_us;
+		while (1) {
+			pthread_mutex_lock(&priv->vq_config_lock);
+			max = mlx5_vdpa_queues_complete(priv);
+			if (max == 0 && priv->no_traffic_counter++ >=
+			    priv->no_traffic_max) {
+				DRV_LOG(DEBUG, "Device %s traffic was stopped.",
+					priv->vdev->device->name);
+				mlx5_vdpa_arm_all_cqs(priv);
+				do {
+					pthread_mutex_unlock
+							(&priv->vq_config_lock);
+					cq = mlx5_vdpa_event_wait(priv);
+					pthread_mutex_lock
+							(&priv->vq_config_lock);
+					if (cq == NULL ||
+					       mlx5_vdpa_queue_complete(cq) > 0)
+						break;
+				} while (1);
+				priv->timer_delay_us = priv->event_us;
+				priv->no_traffic_counter = 0;
+			} else if (max != 0) {
+				priv->no_traffic_counter = 0;
+			}
+			pthread_mutex_unlock(&priv->vq_config_lock);
+			mlx5_vdpa_timer_sleep(priv, max);
+		}
+		return NULL;
+	case MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT:
+		do {
+			cq = mlx5_vdpa_event_wait(priv);
+			if (cq != NULL) {
+				pthread_mutex_lock(&priv->vq_config_lock);
+				if (mlx5_vdpa_queue_complete(cq) > 0)
+					mlx5_vdpa_cq_arm(priv, cq);
+				pthread_mutex_unlock(&priv->vq_config_lock);
+			}
+		} while (1);
+		return NULL;
+	default:
+		return NULL;
 	}
-	pthread_mutex_unlock(&priv->timer_lock);
-	pthread_mutex_unlock(&priv->vq_config_lock);
 }
 
 static void
@@ -510,80 +481,49 @@
 	if (!priv->eventc)
 		/* All virtqs are in poll mode. */
 		return 0;
-	if (priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
-		pthread_mutex_init(&priv->timer_lock, NULL);
-		pthread_cond_init(&priv->timer_cond, NULL);
-		priv->timer_on = 0;
-		pthread_attr_init(&attr);
-		CPU_ZERO(&cpuset);
-		if (priv->event_core != -1)
-			CPU_SET(priv->event_core, &cpuset);
-		else
-			cpuset = rte_lcore_cpuset(rte_get_main_lcore());
-		ret = pthread_attr_setaffinity_np(&attr, sizeof(cpuset),
-						  &cpuset);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to set thread affinity.");
-			return -1;
-		}
-		ret = pthread_attr_setschedpolicy(&attr, SCHED_RR);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to set thread sched policy = RR.");
-			return -1;
-		}
-		ret = pthread_attr_setschedparam(&attr, &sp);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to set thread priority.");
-			return -1;
-		}
-		ret = pthread_create(&priv->timer_tid, &attr,
-				     mlx5_vdpa_poll_handle, (void *)priv);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to create timer thread.");
-			return -1;
-		}
-		snprintf(name, sizeof(name), "vDPA-mlx5-%d", priv->vid);
-		ret = pthread_setname_np(priv->timer_tid, name);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to set timer thread name.");
-			return -1;
-		}
+	pthread_attr_init(&attr);
+	CPU_ZERO(&cpuset);
+	if (priv->event_core != -1)
+		CPU_SET(priv->event_core, &cpuset);
+	else
+		cpuset = rte_lcore_cpuset(rte_get_main_lcore());
+	ret = pthread_attr_setaffinity_np(&attr, sizeof(cpuset),
+					  &cpuset);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set thread affinity.");
+		return -1;
 	}
-	priv->intr_handle.fd = priv->eventc->fd;
-	priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
-	if (rte_intr_callback_register(&priv->intr_handle,
-				       mlx5_vdpa_interrupt_handler, priv)) {
-		priv->intr_handle.fd = 0;
-		DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno);
-		goto error;
+	ret = pthread_attr_setschedpolicy(&attr, SCHED_RR);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set thread sched policy = RR.");
+		return -1;
 	}
+	ret = pthread_attr_setschedparam(&attr, &sp);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set thread priority.");
+		return -1;
+	}
+	ret = pthread_create(&priv->timer_tid, &attr,
+			     mlx5_vdpa_event_handle, (void *)priv);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to create timer thread.");
+		return -1;
+	}
+	snprintf(name, sizeof(name), "vDPA-mlx5-%d", priv->vid);
+	ret = pthread_setname_np(priv->timer_tid, name);
+	if (ret)
+		DRV_LOG(ERR, "Failed to set timer thread name.");
+	else
+		DRV_LOG(DEBUG, "Device %s thread name: %s.",
+			priv->vdev->device->name, name);
 	return 0;
-error:
-	mlx5_vdpa_cqe_event_unset(priv);
-	return -1;
 }
 
 void
 mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv)
 {
-	int retries = MLX5_VDPA_INTR_RETRIES;
-	int ret = -EAGAIN;
 	void *status;
 
-	if (priv->intr_handle.fd) {
-		while (retries-- && ret == -EAGAIN) {
-			ret = rte_intr_callback_unregister(&priv->intr_handle,
-						    mlx5_vdpa_interrupt_handler,
-						    priv);
-			if (ret == -EAGAIN) {
-				DRV_LOG(DEBUG, "Try again to unregister fd %d "
-					"of CQ interrupt, retries = %d.",
-					priv->intr_handle.fd, retries);
-				rte_pause();
-			}
-		}
-		memset(&priv->intr_handle, 0, sizeof(priv->intr_handle));
-	}
 	if (priv->timer_tid) {
 		pthread_cancel(priv->timer_tid);
 		pthread_join(priv->timer_tid, &status);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management
  2021-03-18  9:13 [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management Matan Azrad
@ 2021-03-29 14:09 ` Maxime Coquelin
  2021-04-07  6:49 ` Xia, Chenbo
  2021-05-02 10:45 ` [dpdk-dev] [PATCH v2] " Matan Azrad
  2 siblings, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2021-03-29 14:09 UTC (permalink / raw)
  To: Matan Azrad, dev



On 3/18/21 10:13 AM, Matan Azrad wrote:
> The driver should notify the guest for each traffic burst detected by CQ
> polling.
> 
> The CQ polling trigger is defined by `event_mode` device argument,
> either by busy polling on all the CQs or by blocked call to HW
> completion event using DevX channel.
> 
> Also, the polling event modes can move to blocked call when the
> traffic rate is low.
> 
> The current blocked call uses the EAL interrupt API suffering a lot
> of overhead in the API management and serve all the drivers and
> libraries using only single thread.
> 
> Use blocking FD of the DevX channel in order to do blocked call
> directly by the DevX channel FD mechanism.
> 
> Signed-off-by: Matan Azrad <matan@nvidia.com>
> Acked-by: Xueming Li <xuemingl@nvidia.com>
> ---
>  doc/guides/vdpadevs/mlx5.rst        |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa.c       |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa.h       |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa_event.c | 308 +++++++++++++++---------------------
>  4 files changed, 134 insertions(+), 198 deletions(-)
> 


Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management
  2021-03-18  9:13 [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management Matan Azrad
  2021-03-29 14:09 ` Maxime Coquelin
@ 2021-04-07  6:49 ` Xia, Chenbo
  2021-04-11  9:07   ` Thomas Monjalon
  2021-05-02 10:45 ` [dpdk-dev] [PATCH v2] " Matan Azrad
  2 siblings, 1 reply; 9+ messages in thread
From: Xia, Chenbo @ 2021-04-07  6:49 UTC (permalink / raw)
  To: Matan Azrad, dev; +Cc: Maxime Coquelin

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Matan Azrad
> Sent: Thursday, March 18, 2021 5:13 PM
> To: dev@dpdk.org
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management
> 
> The driver should notify the guest for each traffic burst detected by CQ
> polling.
> 
> The CQ polling trigger is defined by `event_mode` device argument,
> either by busy polling on all the CQs or by blocked call to HW
> completion event using DevX channel.
> 
> Also, the polling event modes can move to blocked call when the
> traffic rate is low.
> 
> The current blocked call uses the EAL interrupt API suffering a lot
> of overhead in the API management and serve all the drivers and
> libraries using only single thread.
> 
> Use blocking FD of the DevX channel in order to do blocked call
> directly by the DevX channel FD mechanism.
> 
> Signed-off-by: Matan Azrad <matan@nvidia.com>
> Acked-by: Xueming Li <xuemingl@nvidia.com>
> ---
>  doc/guides/vdpadevs/mlx5.rst        |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa.c       |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa.h       |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa_event.c | 308 +++++++++++++++--------------------
> -
>  4 files changed, 134 insertions(+), 198 deletions(-)
> 
> --
> 1.8.3.1

Patch applied to next-virtio/main with conflict resolved.

Thanks!


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management
  2021-04-07  6:49 ` Xia, Chenbo
@ 2021-04-11  9:07   ` Thomas Monjalon
  2021-04-29  9:48     ` Maxime Coquelin
  0 siblings, 1 reply; 9+ messages in thread
From: Thomas Monjalon @ 2021-04-11  9:07 UTC (permalink / raw)
  To: Matan Azrad
  Cc: dev, Maxime Coquelin, Xia, Chenbo, ferruh.yigit, david.marchand

07/04/2021 08:49, Xia, Chenbo:
> > Signed-off-by: Matan Azrad <matan@nvidia.com>
> > Acked-by: Xueming Li <xuemingl@nvidia.com>
> 
> Patch applied to next-virtio/main with conflict resolved.
> 
> Thanks!

Sorry it cannot be pulled in the main tree because it breaks
compilation with musl libc.

It can be fixed in the same way as in
41b5a7a8494e ("vdpa/mlx5: replace pthread functions unavailable in musl")
The function pthread_attr_setaffinity_np() must be replaced with
pthread_setaffinity_np() if moved after the call to pthread_create().

We could add a checkpatch check for forbidden functions like
	pthread_yield, pthread_attr_setaffinity_np
and forbidden includes:
	<error.h>, <sys/fcntl.h>



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management
  2021-04-11  9:07   ` Thomas Monjalon
@ 2021-04-29  9:48     ` Maxime Coquelin
  2021-05-02 10:47       ` Matan Azrad
  0 siblings, 1 reply; 9+ messages in thread
From: Maxime Coquelin @ 2021-04-29  9:48 UTC (permalink / raw)
  To: Thomas Monjalon, Matan Azrad
  Cc: dev, Xia, Chenbo, ferruh.yigit, david.marchand

Hi Matan,

On 4/11/21 11:07 AM, Thomas Monjalon wrote:
> 07/04/2021 08:49, Xia, Chenbo:
>>> Signed-off-by: Matan Azrad <matan@nvidia.com>
>>> Acked-by: Xueming Li <xuemingl@nvidia.com>
>>
>> Patch applied to next-virtio/main with conflict resolved.
>>
>> Thanks!
> 
> Sorry it cannot be pulled in the main tree because it breaks
> compilation with musl libc.
> 
> It can be fixed in the same way as in
> 41b5a7a8494e ("vdpa/mlx5: replace pthread functions unavailable in musl")
> The function pthread_attr_setaffinity_np() must be replaced with
> pthread_setaffinity_np() if moved after the call to pthread_create().
> 
> We could add a checkpatch check for forbidden functions like
> 	pthread_yield, pthread_attr_setaffinity_np
> and forbidden includes:
> 	<error.h>, <sys/fcntl.h>
> 
> 

Could you please send a new version taking care of Thomas' request.

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH v2] vdpa/mlx5: improve interrupt management
  2021-03-18  9:13 [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management Matan Azrad
  2021-03-29 14:09 ` Maxime Coquelin
  2021-04-07  6:49 ` Xia, Chenbo
@ 2021-05-02 10:45 ` Matan Azrad
  2021-05-03 14:27   ` Maxime Coquelin
  2021-05-04  8:29   ` Maxime Coquelin
  2 siblings, 2 replies; 9+ messages in thread
From: Matan Azrad @ 2021-05-02 10:45 UTC (permalink / raw)
  To: dev; +Cc: matan, thomas, chenbo.xia, maxime.coquelin, Xueming Li

The driver should notify the guest for each traffic burst detected by CQ
polling.

The CQ polling trigger is defined by `event_mode` device argument,
either by busy polling on all the CQs or by blocked call to HW
completion event using DevX channel.

Also, the polling event modes can move to blocked call when the
traffic rate is low.

The current blocked call uses the EAL interrupt API suffering a lot
of overhead in the API management and serve all the drivers and
libraries using only single thread.

Use blocking FD of the DevX channel in order to do blocked call
directly by the DevX channel FD mechanism.

Signed-off-by: Matan Azrad <matan@nvidia.com>
Acked-by: Xueming Li <xuemingl@nvidia.com>
---
 doc/guides/vdpadevs/mlx5.rst        |   8 +-
 drivers/vdpa/mlx5/mlx5_vdpa.c       |   8 +-
 drivers/vdpa/mlx5/mlx5_vdpa.h       |   8 +-
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 304 +++++++++++-----------------
 4 files changed, 130 insertions(+), 198 deletions(-)

v2:
Rebased on top of latest updates by Thomas.


diff --git a/doc/guides/vdpadevs/mlx5.rst b/doc/guides/vdpadevs/mlx5.rst
index 1f2ae6fb92..9b2f9f12c7 100644
--- a/doc/guides/vdpadevs/mlx5.rst
+++ b/doc/guides/vdpadevs/mlx5.rst
@@ -129,10 +129,10 @@ Driver options
 
 - ``no_traffic_time`` parameter [int]
 
-  A nonzero value defines the traffic off time, in seconds, that moves the
-  driver to no-traffic mode. In this mode the timer events are stopped and
-  interrupts are configured to the device in order to notify traffic for the
-  driver. Default value is 2s.
+  A nonzero value defines the traffic off time, in polling cycle time units,
+  that moves the driver to no-traffic mode. In this mode the polling is stopped
+  and interrupts are configured to the device in order to notify traffic for the
+  driver. Default value is 16.
 
 - ``event_core`` parameter [int]
 
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 898e50f807..619e31d61c 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -44,7 +44,7 @@
 
 #define MLX5_VDPA_MAX_RETRIES 20
 #define MLX5_VDPA_USEC 1000
-#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
 
 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
 					      TAILQ_HEAD_INITIALIZER(priv_list);
@@ -632,7 +632,7 @@ mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
 	} else if (strcmp(key, "event_us") == 0) {
 		priv->event_us = (uint32_t)tmp;
 	} else if (strcmp(key, "no_traffic_time") == 0) {
-		priv->no_traffic_time_s = (uint32_t)tmp;
+		priv->no_traffic_max = (uint32_t)tmp;
 	} else if (strcmp(key, "event_core") == 0) {
 		if (tmp >= (unsigned long)n_cores)
 			DRV_LOG(WARNING, "Invalid event_core %s.", val);
@@ -658,7 +658,7 @@ mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
 	priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
 	priv->event_us = 0;
 	priv->event_core = -1;
-	priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+	priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
 	if (devargs == NULL)
 		return;
 	kvlist = rte_kvargs_parse(devargs->args, NULL);
@@ -671,7 +671,7 @@ mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
 		priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
 	DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
 	DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
-	DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+	DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
 }
 
 /**
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index d93b430c97..722c72b65e 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -120,16 +120,13 @@ struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
 	pthread_mutex_t vq_config_lock;
-	uint64_t last_traffic_tic;
+	uint64_t no_traffic_counter;
 	pthread_t timer_tid;
-	pthread_mutex_t timer_lock;
-	pthread_cond_t timer_cond;
-	volatile uint8_t timer_on;
 	int event_mode;
 	int event_core; /* Event thread cpu affinity core. */
 	uint32_t event_us;
 	uint32_t timer_delay_us;
-	uint32_t no_traffic_time_s;
+	uint32_t no_traffic_max;
 	uint8_t hw_latency_mode; /* Hardware CQ moderation mode. */
 	uint16_t hw_max_latency_us; /* Hardware CQ moderation period in usec. */
 	uint16_t hw_max_pending_comp; /* Hardware CQ moderation counter. */
@@ -146,7 +143,6 @@ struct mlx5_vdpa_priv {
 	struct mlx5dv_devx_event_channel *eventc;
 	struct mlx5dv_devx_event_channel *err_chnl;
 	struct mlx5dv_devx_uar *uar;
-	struct rte_intr_handle intr_handle;
 	struct rte_intr_handle err_intr_handle;
 	struct mlx5_devx_obj *td;
 	struct mlx5_devx_obj *tiss[16]; /* TIS list for each LAG port. */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index ca6e985336..88f6a4256d 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -36,17 +36,6 @@ mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 	}
 #ifdef HAVE_IBV_DEVX_EVENT
 	if (priv->eventc) {
-		union {
-			struct mlx5dv_devx_async_event_hdr event_resp;
-			uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr)
-									 + 128];
-		} out;
-
-		/* Clean all pending events. */
-		while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
-		       sizeof(out.buf)) >=
-		       (ssize_t)sizeof(out.event_resp.cookie))
-			;
 		mlx5_os_devx_destroy_event_channel(priv->eventc);
 		priv->eventc = NULL;
 	}
@@ -57,8 +46,6 @@ mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 static int
 mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
 {
-	int flags, ret;
-
 	if (priv->eventc)
 		return 0;
 	priv->eventc = mlx5_os_devx_create_event_channel(priv->ctx,
@@ -69,12 +56,6 @@ mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
 			rte_errno);
 		goto error;
 	}
-	flags = fcntl(priv->eventc->fd, F_GETFL);
-	ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
-	if (ret) {
-		DRV_LOG(ERR, "Failed to change event channel FD.");
-		goto error;
-	}
 	/*
 	 * This PMD always claims the write memory barrier on UAR
 	 * registers writings, it is safe to allocate UAR with any
@@ -238,122 +219,112 @@ mlx5_vdpa_timer_sleep(struct mlx5_vdpa_priv *priv, uint32_t max)
 		sched_yield();
 }
 
-static void *
-mlx5_vdpa_poll_handle(void *arg)
+/* Notify virtio device for specific virtq new traffic. */
+static uint32_t
+mlx5_vdpa_queue_complete(struct mlx5_vdpa_cq *cq)
 {
-	struct mlx5_vdpa_priv *priv = arg;
-	int i;
-	struct mlx5_vdpa_cq *cq;
-	uint32_t max;
-	uint64_t current_tic;
-
-	pthread_mutex_lock(&priv->timer_lock);
-	while (!priv->timer_on)
-		pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
-	pthread_mutex_unlock(&priv->timer_lock);
-	priv->timer_delay_us = priv->event_mode ==
-					    MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
-					      MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
-								 priv->event_us;
-	while (1) {
-		max = 0;
-		pthread_mutex_lock(&priv->vq_config_lock);
-		for (i = 0; i < priv->nr_virtqs; i++) {
-			cq = &priv->virtqs[i].eqp.cq;
-			if (cq->cq_obj.cq && !cq->armed) {
-				uint32_t comp = mlx5_vdpa_cq_poll(cq);
-
-				if (comp) {
-					/* Notify guest for descs consuming. */
-					if (cq->callfd != -1)
-						eventfd_write(cq->callfd,
-							      (eventfd_t)1);
-					if (comp > max)
-						max = comp;
-				}
-			}
+	uint32_t comp = 0;
+
+	if (cq->cq_obj.cq) {
+		comp = mlx5_vdpa_cq_poll(cq);
+		if (comp) {
+			if (cq->callfd != -1)
+				eventfd_write(cq->callfd, (eventfd_t)1);
+			cq->armed = 0;
 		}
-		current_tic = rte_rdtsc();
-		if (!max) {
-			/* No traffic ? stop timer and load interrupts. */
-			if (current_tic - priv->last_traffic_tic >=
-			    rte_get_timer_hz() * priv->no_traffic_time_s) {
-				DRV_LOG(DEBUG, "Device %s traffic was stopped.",
-					priv->vdev->device->name);
-				mlx5_vdpa_arm_all_cqs(priv);
-				pthread_mutex_unlock(&priv->vq_config_lock);
-				pthread_mutex_lock(&priv->timer_lock);
-				priv->timer_on = 0;
-				while (!priv->timer_on)
-					pthread_cond_wait(&priv->timer_cond,
-							  &priv->timer_lock);
-				pthread_mutex_unlock(&priv->timer_lock);
-				priv->timer_delay_us = priv->event_mode ==
-					    MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
-					      MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
-								 priv->event_us;
-				continue;
-			}
-		} else {
-			priv->last_traffic_tic = current_tic;
-		}
-		pthread_mutex_unlock(&priv->vq_config_lock);
-		mlx5_vdpa_timer_sleep(priv, max);
 	}
-	return NULL;
+	return comp;
 }
 
-static void
-mlx5_vdpa_interrupt_handler(void *cb_arg)
+/* Notify virtio device for any virtq new traffic. */
+static uint32_t
+mlx5_vdpa_queues_complete(struct mlx5_vdpa_priv *priv)
+{
+	int i;
+	uint32_t max = 0;
+
+	for (i = 0; i < priv->nr_virtqs; i++) {
+		struct mlx5_vdpa_cq *cq = &priv->virtqs[i].eqp.cq;
+		uint32_t comp = mlx5_vdpa_queue_complete(cq);
+
+		if (comp > max)
+			max = comp;
+	}
+	return max;
+}
+
+/* Wait on all CQs channel for completion event. */
+static struct mlx5_vdpa_cq *
+mlx5_vdpa_event_wait(struct mlx5_vdpa_priv *priv __rte_unused)
 {
-	struct mlx5_vdpa_priv *priv = cb_arg;
 #ifdef HAVE_IBV_DEVX_EVENT
 	union {
 		struct mlx5dv_devx_async_event_hdr event_resp;
 		uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
 	} out;
+	int ret = mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
+					    sizeof(out.buf));
 
-	pthread_mutex_lock(&priv->vq_config_lock);
-	while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
-					 sizeof(out.buf)) >=
-				       (ssize_t)sizeof(out.event_resp.cookie)) {
-		struct mlx5_vdpa_cq *cq = (struct mlx5_vdpa_cq *)
-					       (uintptr_t)out.event_resp.cookie;
-		struct mlx5_vdpa_event_qp *eqp = container_of(cq,
-						 struct mlx5_vdpa_event_qp, cq);
-		struct mlx5_vdpa_virtq *virtq = container_of(eqp,
-						   struct mlx5_vdpa_virtq, eqp);
-
-		if (!virtq->enable)
-			continue;
-		mlx5_vdpa_cq_poll(cq);
-		/* Notify guest for descs consuming. */
-		if (cq->callfd != -1)
-			eventfd_write(cq->callfd, (eventfd_t)1);
-		if (priv->event_mode == MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
-			mlx5_vdpa_cq_arm(priv, cq);
-			pthread_mutex_unlock(&priv->vq_config_lock);
-			return;
-		}
-		/* Don't arm again - timer will take control. */
-		DRV_LOG(DEBUG, "Device %s virtq %d cq %d event was captured."
-			" Timer is %s, cq ci is %u.\n",
-			priv->vdev->device->name,
-			(int)virtq->index, cq->cq_obj.cq->id,
-			priv->timer_on ? "on" : "off", cq->cq_ci);
-		cq->armed = 0;
-	}
+	if (ret >= 0)
+		return (struct mlx5_vdpa_cq *)(uintptr_t)out.event_resp.cookie;
+	DRV_LOG(INFO, "Got error in devx_get_event, ret = %d, errno = %d.",
+		ret, errno);
 #endif
+	return NULL;
+}
 
-	/* Traffic detected: make sure timer is on. */
-	priv->last_traffic_tic = rte_rdtsc();
-	pthread_mutex_lock(&priv->timer_lock);
-	if (!priv->timer_on) {
-		priv->timer_on = 1;
-		pthread_cond_signal(&priv->timer_cond);
+static void *
+mlx5_vdpa_event_handle(void *arg)
+{
+	struct mlx5_vdpa_priv *priv = arg;
+	struct mlx5_vdpa_cq *cq;
+	uint32_t max;
+
+	switch (priv->event_mode) {
+	case MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER:
+	case MLX5_VDPA_EVENT_MODE_FIXED_TIMER:
+		priv->timer_delay_us = priv->event_us;
+		while (1) {
+			pthread_mutex_lock(&priv->vq_config_lock);
+			max = mlx5_vdpa_queues_complete(priv);
+			if (max == 0 && priv->no_traffic_counter++ >=
+			    priv->no_traffic_max) {
+				DRV_LOG(DEBUG, "Device %s traffic was stopped.",
+					priv->vdev->device->name);
+				mlx5_vdpa_arm_all_cqs(priv);
+				do {
+					pthread_mutex_unlock
+							(&priv->vq_config_lock);
+					cq = mlx5_vdpa_event_wait(priv);
+					pthread_mutex_lock
+							(&priv->vq_config_lock);
+					if (cq == NULL ||
+					       mlx5_vdpa_queue_complete(cq) > 0)
+						break;
+				} while (1);
+				priv->timer_delay_us = priv->event_us;
+				priv->no_traffic_counter = 0;
+			} else if (max != 0) {
+				priv->no_traffic_counter = 0;
+			}
+			pthread_mutex_unlock(&priv->vq_config_lock);
+			mlx5_vdpa_timer_sleep(priv, max);
+		}
+		return NULL;
+	case MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT:
+		do {
+			cq = mlx5_vdpa_event_wait(priv);
+			if (cq != NULL) {
+				pthread_mutex_lock(&priv->vq_config_lock);
+				if (mlx5_vdpa_queue_complete(cq) > 0)
+					mlx5_vdpa_cq_arm(priv, cq);
+				pthread_mutex_unlock(&priv->vq_config_lock);
+			}
+		} while (1);
+		return NULL;
+	default:
+		return NULL;
 	}
-	pthread_mutex_unlock(&priv->timer_lock);
-	pthread_mutex_unlock(&priv->vq_config_lock);
 }
 
 static void
@@ -511,80 +482,45 @@ mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv)
 	if (!priv->eventc)
 		/* All virtqs are in poll mode. */
 		return 0;
-	if (priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
-		pthread_mutex_init(&priv->timer_lock, NULL);
-		pthread_cond_init(&priv->timer_cond, NULL);
-		priv->timer_on = 0;
-		pthread_attr_init(&attr);
-		ret = pthread_attr_setschedpolicy(&attr, SCHED_RR);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to set thread sched policy = RR.");
-			return -1;
-		}
-		ret = pthread_attr_setschedparam(&attr, &sp);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to set thread priority.");
-			return -1;
-		}
-		ret = pthread_create(&priv->timer_tid, &attr,
-				     mlx5_vdpa_poll_handle, (void *)priv);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to create timer thread.");
-			return -1;
-		}
-		CPU_ZERO(&cpuset);
-		if (priv->event_core != -1)
-			CPU_SET(priv->event_core, &cpuset);
-		else
-			cpuset = rte_lcore_cpuset(rte_get_main_lcore());
-		ret = pthread_setaffinity_np(priv->timer_tid,
-					     sizeof(cpuset), &cpuset);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to set thread affinity.");
-			goto error;
-		}
-		snprintf(name, sizeof(name), "vDPA-mlx5-%d", priv->vid);
-		ret = rte_thread_setname(priv->timer_tid, name);
-		if (ret) {
-			DRV_LOG(ERR, "Failed to set timer thread name.");
-			return -1;
-		}
+	pthread_attr_init(&attr);
+	ret = pthread_attr_setschedpolicy(&attr, SCHED_RR);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set thread sched policy = RR.");
+		return -1;
 	}
-	priv->intr_handle.fd = priv->eventc->fd;
-	priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
-	if (rte_intr_callback_register(&priv->intr_handle,
-				       mlx5_vdpa_interrupt_handler, priv)) {
-		priv->intr_handle.fd = 0;
-		DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno);
-		goto error;
+	ret = pthread_attr_setschedparam(&attr, &sp);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set thread priority.");
+		return -1;
 	}
+	ret = pthread_create(&priv->timer_tid, &attr, mlx5_vdpa_event_handle,
+			     (void *)priv);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to create timer thread.");
+		return -1;
+	}
+	CPU_ZERO(&cpuset);
+	if (priv->event_core != -1)
+		CPU_SET(priv->event_core, &cpuset);
+	else
+		cpuset = rte_lcore_cpuset(rte_get_main_lcore());
+	ret = pthread_setaffinity_np(priv->timer_tid, sizeof(cpuset), &cpuset);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set thread affinity.");
+		return -1;
+	}
+	snprintf(name, sizeof(name), "vDPA-mlx5-%d", priv->vid);
+	ret = rte_thread_setname(priv->timer_tid, name);
+	if (ret)
+		DRV_LOG(DEBUG, "Cannot set timer thread name.");
 	return 0;
-error:
-	mlx5_vdpa_cqe_event_unset(priv);
-	return -1;
 }
 
 void
 mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv)
 {
-	int retries = MLX5_VDPA_INTR_RETRIES;
-	int ret = -EAGAIN;
 	void *status;
 
-	if (priv->intr_handle.fd) {
-		while (retries-- && ret == -EAGAIN) {
-			ret = rte_intr_callback_unregister(&priv->intr_handle,
-						    mlx5_vdpa_interrupt_handler,
-						    priv);
-			if (ret == -EAGAIN) {
-				DRV_LOG(DEBUG, "Try again to unregister fd %d "
-					"of CQ interrupt, retries = %d.",
-					priv->intr_handle.fd, retries);
-				rte_pause();
-			}
-		}
-		memset(&priv->intr_handle, 0, sizeof(priv->intr_handle));
-	}
 	if (priv->timer_tid) {
 		pthread_cancel(priv->timer_tid);
 		pthread_join(priv->timer_tid, &status);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management
  2021-04-29  9:48     ` Maxime Coquelin
@ 2021-05-02 10:47       ` Matan Azrad
  0 siblings, 0 replies; 9+ messages in thread
From: Matan Azrad @ 2021-05-02 10:47 UTC (permalink / raw)
  To: Maxime Coquelin, NBU-Contact-Thomas Monjalon
  Cc: dev, Xia, Chenbo, ferruh.yigit, david.marchand



From: Maxime Coquelin 
> Hi Matan,
> 
> On 4/11/21 11:07 AM, Thomas Monjalon wrote:
> > 07/04/2021 08:49, Xia, Chenbo:
> >>> Signed-off-by: Matan Azrad <matan@nvidia.com>
> >>> Acked-by: Xueming Li <xuemingl@nvidia.com>
> >>
> >> Patch applied to next-virtio/main with conflict resolved.
> >>
> >> Thanks!
> >
> > Sorry it cannot be pulled in the main tree because it breaks
> > compilation with musl libc.
> >
> > It can be fixed in the same way as in
> > 41b5a7a8494e ("vdpa/mlx5: replace pthread functions unavailable in
> > musl") The function pthread_attr_setaffinity_np() must be replaced
> > with
> > pthread_setaffinity_np() if moved after the call to pthread_create().
> >
> > We could add a checkpatch check for forbidden functions like
> >       pthread_yield, pthread_attr_setaffinity_np and forbidden
> > includes:
> >       <error.h>, <sys/fcntl.h>
> >
> >
> 
> Could you please send a new version taking care of Thomas' request.
> 
> Thanks,
> Maxime

Done.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v2] vdpa/mlx5: improve interrupt management
  2021-05-02 10:45 ` [dpdk-dev] [PATCH v2] " Matan Azrad
@ 2021-05-03 14:27   ` Maxime Coquelin
  2021-05-04  8:29   ` Maxime Coquelin
  1 sibling, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2021-05-03 14:27 UTC (permalink / raw)
  To: Matan Azrad, dev; +Cc: thomas, chenbo.xia, Xueming Li



On 5/2/21 12:45 PM, Matan Azrad wrote:
> The driver should notify the guest for each traffic burst detected by CQ
> polling.
> 
> The CQ polling trigger is defined by `event_mode` device argument,
> either by busy polling on all the CQs or by blocked call to HW
> completion event using DevX channel.
> 
> Also, the polling event modes can move to blocked call when the
> traffic rate is low.
> 
> The current blocked call uses the EAL interrupt API suffering a lot
> of overhead in the API management and serve all the drivers and
> libraries using only single thread.
> 
> Use blocking FD of the DevX channel in order to do blocked call
> directly by the DevX channel FD mechanism.
> 
> Signed-off-by: Matan Azrad <matan@nvidia.com>
> Acked-by: Xueming Li <xuemingl@nvidia.com>
> ---
>  doc/guides/vdpadevs/mlx5.rst        |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa.c       |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa.h       |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa_event.c | 304 +++++++++++-----------------
>  4 files changed, 130 insertions(+), 198 deletions(-)
> 
> v2:
> Rebased on top of latest updates by Thomas.
> 
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v2] vdpa/mlx5: improve interrupt management
  2021-05-02 10:45 ` [dpdk-dev] [PATCH v2] " Matan Azrad
  2021-05-03 14:27   ` Maxime Coquelin
@ 2021-05-04  8:29   ` Maxime Coquelin
  1 sibling, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2021-05-04  8:29 UTC (permalink / raw)
  To: Matan Azrad, dev; +Cc: thomas, chenbo.xia, Xueming Li



On 5/2/21 12:45 PM, Matan Azrad wrote:
> The driver should notify the guest for each traffic burst detected by CQ
> polling.
> 
> The CQ polling trigger is defined by `event_mode` device argument,
> either by busy polling on all the CQs or by blocked call to HW
> completion event using DevX channel.
> 
> Also, the polling event modes can move to blocked call when the
> traffic rate is low.
> 
> The current blocked call uses the EAL interrupt API suffering a lot
> of overhead in the API management and serve all the drivers and
> libraries using only single thread.
> 
> Use blocking FD of the DevX channel in order to do blocked call
> directly by the DevX channel FD mechanism.
> 
> Signed-off-by: Matan Azrad <matan@nvidia.com>
> Acked-by: Xueming Li <xuemingl@nvidia.com>
> ---
>  doc/guides/vdpadevs/mlx5.rst        |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa.c       |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa.h       |   8 +-
>  drivers/vdpa/mlx5/mlx5_vdpa_event.c | 304 +++++++++++-----------------
>  4 files changed, 130 insertions(+), 198 deletions(-)
> 
> v2:
> Rebased on top of latest updates by Thomas.
> 


Applied to dpdk-next-virtio/main.

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-05-04  8:29 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-18  9:13 [dpdk-dev] [PATCH] vdpa/mlx5: improve interrupt management Matan Azrad
2021-03-29 14:09 ` Maxime Coquelin
2021-04-07  6:49 ` Xia, Chenbo
2021-04-11  9:07   ` Thomas Monjalon
2021-04-29  9:48     ` Maxime Coquelin
2021-05-02 10:47       ` Matan Azrad
2021-05-02 10:45 ` [dpdk-dev] [PATCH v2] " Matan Azrad
2021-05-03 14:27   ` Maxime Coquelin
2021-05-04  8:29   ` Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).