patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault
       [not found] <20220224132820.1939650-1-xuemingl@nvidia.com>
@ 2022-02-24 13:28 ` Xueming Li
  2022-02-24 13:28 ` [PATCH 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Xueming Li @ 2022-02-24 13:28 UTC (permalink / raw)
  To: dev
  Cc: xuemingl, matan, stable, Matan Azrad, Viacheslav Ovsiienko,
	Maxime Coquelin

Disable interrupt unregister timeout to avoid invalid FD caused
interrupt thread segment fault.

Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
Cc: matan@mellanox.com
Cc: stable@dpdk.org

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index 3416797d289..de324506cb9 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -17,7 +17,7 @@
 
 
 static void
-mlx5_vdpa_virtq_handler(void *cb_arg)
+mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 {
 	struct mlx5_vdpa_virtq *virtq = cb_arg;
 	struct mlx5_vdpa_priv *priv = virtq->priv;
@@ -59,20 +59,16 @@ static int
 mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
 {
 	unsigned int i;
-	int retries = MLX5_VDPA_INTR_RETRIES;
 	int ret = -EAGAIN;
 
-	if (rte_intr_fd_get(virtq->intr_handle) != -1) {
-		while (retries-- && ret == -EAGAIN) {
+	if (rte_intr_fd_get(virtq->intr_handle) >= 0) {
+		while (ret == -EAGAIN) {
 			ret = rte_intr_callback_unregister(virtq->intr_handle,
-							mlx5_vdpa_virtq_handler,
-							virtq);
+					mlx5_vdpa_virtq_kick_handler, virtq);
 			if (ret == -EAGAIN) {
-				DRV_LOG(DEBUG, "Try again to unregister fd %d "
-				"of virtq %d interrupt, retries = %d.",
-				rte_intr_fd_get(virtq->intr_handle),
-				(int)virtq->index, retries);
-
+				DRV_LOG(DEBUG, "Try again to unregister fd %d of virtq %hu interrupt",
+					rte_intr_fd_get(virtq->intr_handle),
+					(int)virtq->index);
 				usleep(MLX5_VDPA_INTR_RETRIES_USEC);
 			}
 		}
@@ -359,7 +355,7 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
 			goto error;
 
 		if (rte_intr_callback_register(virtq->intr_handle,
-					       mlx5_vdpa_virtq_handler,
+					       mlx5_vdpa_virtq_kick_handler,
 					       virtq)) {
 			rte_intr_fd_set(virtq->intr_handle, -1);
 			DRV_LOG(ERR, "Failed to register virtq %d interrupt.",
-- 
2.35.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/7] vdpa/mlx5: fix dead loop when process interrupted
       [not found] <20220224132820.1939650-1-xuemingl@nvidia.com>
  2022-02-24 13:28 ` [PATCH 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
@ 2022-02-24 13:28 ` Xueming Li
       [not found] ` <20220224143809.1977642-1-xuemingl@nvidia.com>
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Xueming Li @ 2022-02-24 13:28 UTC (permalink / raw)
  To: dev; +Cc: xuemingl, stable, Matan Azrad, Viacheslav Ovsiienko, Maxime Coquelin

In Ctrl+C handling, sometimes kick handling thread gets endless EGAIN
error and fall into dead lock.

Kick happens frequently in real system due to busy traffic or retry
mechanism. This patch simplifies kick firmware anyway and skip setting
hardware notifier due to potential device error, notifier could be set
in next successful kick request.

Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
Cc: stable@dpdk.org

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index de324506cb9..e1e05924a40 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -23,11 +23,11 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 	struct mlx5_vdpa_priv *priv = virtq->priv;
 	uint64_t buf;
 	int nbytes;
+	int retry;
 
 	if (rte_intr_fd_get(virtq->intr_handle) < 0)
 		return;
-
-	do {
+	for (retry = 0; retry < 3; ++retry) {
 		nbytes = read(rte_intr_fd_get(virtq->intr_handle), &buf,
 			      8);
 		if (nbytes < 0) {
@@ -39,7 +39,9 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 				virtq->index, strerror(errno));
 		}
 		break;
-	} while (1);
+	}
+	if (nbytes < 0)
+		return;
 	rte_write32(virtq->index, priv->virtq_db_addr);
 	if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) {
 		if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v1 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault
       [not found] ` <20220224143809.1977642-1-xuemingl@nvidia.com>
@ 2022-02-24 14:38   ` Xueming Li
  2022-02-24 14:38   ` [PATCH v1 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li
  1 sibling, 0 replies; 10+ messages in thread
From: Xueming Li @ 2022-02-24 14:38 UTC (permalink / raw)
  To: dev
  Cc: xuemingl, matan, stable, Matan Azrad, Viacheslav Ovsiienko,
	Maxime Coquelin

Disable interrupt unregister timeout to avoid invalid FD caused
interrupt thread segment fault.

Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
Cc: matan@mellanox.com
Cc: stable@dpdk.org

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index 3416797d289..de324506cb9 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -17,7 +17,7 @@
 
 
 static void
-mlx5_vdpa_virtq_handler(void *cb_arg)
+mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 {
 	struct mlx5_vdpa_virtq *virtq = cb_arg;
 	struct mlx5_vdpa_priv *priv = virtq->priv;
@@ -59,20 +59,16 @@ static int
 mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
 {
 	unsigned int i;
-	int retries = MLX5_VDPA_INTR_RETRIES;
 	int ret = -EAGAIN;
 
-	if (rte_intr_fd_get(virtq->intr_handle) != -1) {
-		while (retries-- && ret == -EAGAIN) {
+	if (rte_intr_fd_get(virtq->intr_handle) >= 0) {
+		while (ret == -EAGAIN) {
 			ret = rte_intr_callback_unregister(virtq->intr_handle,
-							mlx5_vdpa_virtq_handler,
-							virtq);
+					mlx5_vdpa_virtq_kick_handler, virtq);
 			if (ret == -EAGAIN) {
-				DRV_LOG(DEBUG, "Try again to unregister fd %d "
-				"of virtq %d interrupt, retries = %d.",
-				rte_intr_fd_get(virtq->intr_handle),
-				(int)virtq->index, retries);
-
+				DRV_LOG(DEBUG, "Try again to unregister fd %d of virtq %hu interrupt",
+					rte_intr_fd_get(virtq->intr_handle),
+					(int)virtq->index);
 				usleep(MLX5_VDPA_INTR_RETRIES_USEC);
 			}
 		}
@@ -359,7 +355,7 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
 			goto error;
 
 		if (rte_intr_callback_register(virtq->intr_handle,
-					       mlx5_vdpa_virtq_handler,
+					       mlx5_vdpa_virtq_kick_handler,
 					       virtq)) {
 			rte_intr_fd_set(virtq->intr_handle, -1);
 			DRV_LOG(ERR, "Failed to register virtq %d interrupt.",
-- 
2.35.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v1 2/7] vdpa/mlx5: fix dead loop when process interrupted
       [not found] ` <20220224143809.1977642-1-xuemingl@nvidia.com>
  2022-02-24 14:38   ` [PATCH v1 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
@ 2022-02-24 14:38   ` Xueming Li
  1 sibling, 0 replies; 10+ messages in thread
From: Xueming Li @ 2022-02-24 14:38 UTC (permalink / raw)
  To: dev; +Cc: xuemingl, stable, Matan Azrad, Viacheslav Ovsiienko, Maxime Coquelin

In Ctrl+C handling, sometimes kick handling thread gets endless EGAIN
error and fall into dead lock.

Kick happens frequently in real system due to busy traffic or retry
mechanism. This patch simplifies kick firmware anyway and skip setting
hardware notifier due to potential device error, notifier could be set
in next successful kick request.

Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
Cc: stable@dpdk.org

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index de324506cb9..e1e05924a40 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -23,11 +23,11 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 	struct mlx5_vdpa_priv *priv = virtq->priv;
 	uint64_t buf;
 	int nbytes;
+	int retry;
 
 	if (rte_intr_fd_get(virtq->intr_handle) < 0)
 		return;
-
-	do {
+	for (retry = 0; retry < 3; ++retry) {
 		nbytes = read(rte_intr_fd_get(virtq->intr_handle), &buf,
 			      8);
 		if (nbytes < 0) {
@@ -39,7 +39,9 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 				virtq->index, strerror(errno));
 		}
 		break;
-	} while (1);
+	}
+	if (nbytes < 0)
+		return;
 	rte_write32(virtq->index, priv->virtq_db_addr);
 	if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) {
 		if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault
       [not found] ` <20220224155101.1991626-1-xuemingl@nvidia.com>
@ 2022-02-24 15:50   ` Xueming Li
  2022-04-20 10:39     ` Maxime Coquelin
  2022-02-24 15:50   ` [PATCH v2 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li
  1 sibling, 1 reply; 10+ messages in thread
From: Xueming Li @ 2022-02-24 15:50 UTC (permalink / raw)
  To: dev
  Cc: xuemingl, matan, stable, Matan Azrad, Viacheslav Ovsiienko,
	Maxime Coquelin

Disable interrupt unregister timeout to avoid invalid FD caused
interrupt thread segment fault.

Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
Cc: matan@mellanox.com
Cc: stable@dpdk.org

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index 3416797d289..de324506cb9 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -17,7 +17,7 @@
 
 
 static void
-mlx5_vdpa_virtq_handler(void *cb_arg)
+mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 {
 	struct mlx5_vdpa_virtq *virtq = cb_arg;
 	struct mlx5_vdpa_priv *priv = virtq->priv;
@@ -59,20 +59,16 @@ static int
 mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
 {
 	unsigned int i;
-	int retries = MLX5_VDPA_INTR_RETRIES;
 	int ret = -EAGAIN;
 
-	if (rte_intr_fd_get(virtq->intr_handle) != -1) {
-		while (retries-- && ret == -EAGAIN) {
+	if (rte_intr_fd_get(virtq->intr_handle) >= 0) {
+		while (ret == -EAGAIN) {
 			ret = rte_intr_callback_unregister(virtq->intr_handle,
-							mlx5_vdpa_virtq_handler,
-							virtq);
+					mlx5_vdpa_virtq_kick_handler, virtq);
 			if (ret == -EAGAIN) {
-				DRV_LOG(DEBUG, "Try again to unregister fd %d "
-				"of virtq %d interrupt, retries = %d.",
-				rte_intr_fd_get(virtq->intr_handle),
-				(int)virtq->index, retries);
-
+				DRV_LOG(DEBUG, "Try again to unregister fd %d of virtq %hu interrupt",
+					rte_intr_fd_get(virtq->intr_handle),
+					(int)virtq->index);
 				usleep(MLX5_VDPA_INTR_RETRIES_USEC);
 			}
 		}
@@ -359,7 +355,7 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
 			goto error;
 
 		if (rte_intr_callback_register(virtq->intr_handle,
-					       mlx5_vdpa_virtq_handler,
+					       mlx5_vdpa_virtq_kick_handler,
 					       virtq)) {
 			rte_intr_fd_set(virtq->intr_handle, -1);
 			DRV_LOG(ERR, "Failed to register virtq %d interrupt.",
-- 
2.35.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 2/7] vdpa/mlx5: fix dead loop when process interrupted
       [not found] ` <20220224155101.1991626-1-xuemingl@nvidia.com>
  2022-02-24 15:50   ` [PATCH v2 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
@ 2022-02-24 15:50   ` Xueming Li
  2022-04-20 10:33     ` Maxime Coquelin
  1 sibling, 1 reply; 10+ messages in thread
From: Xueming Li @ 2022-02-24 15:50 UTC (permalink / raw)
  To: dev; +Cc: xuemingl, stable, Matan Azrad, Viacheslav Ovsiienko, Maxime Coquelin

In Ctrl+C handling, sometimes kick handling thread gets endless EGAIN
error and fall into dead lock.

Kick happens frequently in real system due to busy traffic or retry
mechanism. This patch simplifies kick firmware anyway and skip setting
hardware notifier due to potential device error, notifier could be set
in next successful kick request.

Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
Cc: stable@dpdk.org

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index de324506cb9..e1e05924a40 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -23,11 +23,11 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 	struct mlx5_vdpa_priv *priv = virtq->priv;
 	uint64_t buf;
 	int nbytes;
+	int retry;
 
 	if (rte_intr_fd_get(virtq->intr_handle) < 0)
 		return;
-
-	do {
+	for (retry = 0; retry < 3; ++retry) {
 		nbytes = read(rte_intr_fd_get(virtq->intr_handle), &buf,
 			      8);
 		if (nbytes < 0) {
@@ -39,7 +39,9 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 				virtq->index, strerror(errno));
 		}
 		break;
-	} while (1);
+	}
+	if (nbytes < 0)
+		return;
 	rte_write32(virtq->index, priv->virtq_db_addr);
 	if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) {
 		if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/7] vdpa/mlx5: fix dead loop when process interrupted
  2022-02-24 15:50   ` [PATCH v2 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li
@ 2022-04-20 10:33     ` Maxime Coquelin
  0 siblings, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2022-04-20 10:33 UTC (permalink / raw)
  To: Xueming Li, dev; +Cc: stable, Matan Azrad, Viacheslav Ovsiienko



On 2/24/22 16:50, Xueming Li wrote:
> In Ctrl+C handling, sometimes kick handling thread gets endless EGAIN
> error and fall into dead lock.
> 
> Kick happens frequently in real system due to busy traffic or retry
> mechanism. This patch simplifies kick firmware anyway and skip setting
> hardware notifier due to potential device error, notifier could be set
> in next successful kick request.
> 
> Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Xueming Li <xuemingl@nvidia.com>
> ---
>   drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 8 +++++---
>   1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> index de324506cb9..e1e05924a40 100644
> --- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> @@ -23,11 +23,11 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
>   	struct mlx5_vdpa_priv *priv = virtq->priv;
>   	uint64_t buf;
>   	int nbytes;
> +	int retry;
>   
>   	if (rte_intr_fd_get(virtq->intr_handle) < 0)
>   		return;
> -
> -	do {
> +	for (retry = 0; retry < 3; ++retry) {
>   		nbytes = read(rte_intr_fd_get(virtq->intr_handle), &buf,
>   			      8);
>   		if (nbytes < 0) {
> @@ -39,7 +39,9 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
>   				virtq->index, strerror(errno));
>   		}
>   		break;
> -	} while (1);
> +	}
> +	if (nbytes < 0)
> +		return;
>   	rte_write32(virtq->index, priv->virtq_db_addr);
>   	if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) {
>   		if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true))

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault
  2022-02-24 15:50   ` [PATCH v2 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
@ 2022-04-20 10:39     ` Maxime Coquelin
  0 siblings, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2022-04-20 10:39 UTC (permalink / raw)
  To: Xueming Li, dev; +Cc: matan, stable, Matan Azrad, Viacheslav Ovsiienko



On 2/24/22 16:50, Xueming Li wrote:
> Disable interrupt unregister timeout to avoid invalid FD caused
> interrupt thread segment fault.
> 
> Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
> Cc: matan@mellanox.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Xueming Li <xuemingl@nvidia.com>
> ---
>   drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 20 ++++++++------------
>   1 file changed, 8 insertions(+), 12 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault
       [not found] ` <20220508142554.560354-1-xuemingl@nvidia.com>
@ 2022-05-08 14:25   ` Xueming Li
  2022-05-08 14:25   ` [PATCH v3 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li
  1 sibling, 0 replies; 10+ messages in thread
From: Xueming Li @ 2022-05-08 14:25 UTC (permalink / raw)
  To: dev, Maxime Coquelin; +Cc: xuemingl, matan, stable

Disable interrupt unregister timeout to avoid invalid FD caused
interrupt thread segment fault.

Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
Cc: matan@mellanox.com
Cc: stable@dpdk.org

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index 3416797d289..2e517beda24 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -17,7 +17,7 @@
 
 
 static void
-mlx5_vdpa_virtq_handler(void *cb_arg)
+mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 {
 	struct mlx5_vdpa_virtq *virtq = cb_arg;
 	struct mlx5_vdpa_priv *priv = virtq->priv;
@@ -59,20 +59,16 @@ static int
 mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
 {
 	unsigned int i;
-	int retries = MLX5_VDPA_INTR_RETRIES;
 	int ret = -EAGAIN;
 
-	if (rte_intr_fd_get(virtq->intr_handle) != -1) {
-		while (retries-- && ret == -EAGAIN) {
+	if (rte_intr_fd_get(virtq->intr_handle) >= 0) {
+		while (ret == -EAGAIN) {
 			ret = rte_intr_callback_unregister(virtq->intr_handle,
-							mlx5_vdpa_virtq_handler,
-							virtq);
+					mlx5_vdpa_virtq_kick_handler, virtq);
 			if (ret == -EAGAIN) {
-				DRV_LOG(DEBUG, "Try again to unregister fd %d "
-				"of virtq %d interrupt, retries = %d.",
-				rte_intr_fd_get(virtq->intr_handle),
-				(int)virtq->index, retries);
-
+				DRV_LOG(DEBUG, "Try again to unregister fd %d of virtq %hu interrupt",
+					rte_intr_fd_get(virtq->intr_handle),
+					virtq->index);
 				usleep(MLX5_VDPA_INTR_RETRIES_USEC);
 			}
 		}
@@ -359,7 +355,7 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
 			goto error;
 
 		if (rte_intr_callback_register(virtq->intr_handle,
-					       mlx5_vdpa_virtq_handler,
+					       mlx5_vdpa_virtq_kick_handler,
 					       virtq)) {
 			rte_intr_fd_set(virtq->intr_handle, -1);
 			DRV_LOG(ERR, "Failed to register virtq %d interrupt.",
-- 
2.35.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3 2/7] vdpa/mlx5: fix dead loop when process interrupted
       [not found] ` <20220508142554.560354-1-xuemingl@nvidia.com>
  2022-05-08 14:25   ` [PATCH v3 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
@ 2022-05-08 14:25   ` Xueming Li
  1 sibling, 0 replies; 10+ messages in thread
From: Xueming Li @ 2022-05-08 14:25 UTC (permalink / raw)
  To: dev, Maxime Coquelin; +Cc: xuemingl, stable

In Ctrl+C handling, sometimes kick handling thread gets endless EGAIN
error and fall into dead lock.

Kick happens frequently in real system due to busy traffic or retry
mechanism. This patch simplifies kick firmware anyway and skip setting
hardware notifier due to potential device error, notifier could be set
in next successful kick request.

Fixes: 62c813706e41 ("vdpa/mlx5: map doorbell")
Cc: stable@dpdk.org

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index 2e517beda24..2696d54b412 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -23,11 +23,11 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 	struct mlx5_vdpa_priv *priv = virtq->priv;
 	uint64_t buf;
 	int nbytes;
+	int retry;
 
 	if (rte_intr_fd_get(virtq->intr_handle) < 0)
 		return;
-
-	do {
+	for (retry = 0; retry < 3; ++retry) {
 		nbytes = read(rte_intr_fd_get(virtq->intr_handle), &buf,
 			      8);
 		if (nbytes < 0) {
@@ -39,7 +39,9 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 				virtq->index, strerror(errno));
 		}
 		break;
-	} while (1);
+	}
+	if (nbytes < 0)
+		return;
 	rte_write32(virtq->index, priv->virtq_db_addr);
 	if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) {
 		if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-05-08 14:26 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20220224132820.1939650-1-xuemingl@nvidia.com>
2022-02-24 13:28 ` [PATCH 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
2022-02-24 13:28 ` [PATCH 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li
     [not found] ` <20220224143809.1977642-1-xuemingl@nvidia.com>
2022-02-24 14:38   ` [PATCH v1 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
2022-02-24 14:38   ` [PATCH v1 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li
     [not found] ` <20220224155101.1991626-1-xuemingl@nvidia.com>
2022-02-24 15:50   ` [PATCH v2 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
2022-04-20 10:39     ` Maxime Coquelin
2022-02-24 15:50   ` [PATCH v2 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li
2022-04-20 10:33     ` Maxime Coquelin
     [not found] ` <20220508142554.560354-1-xuemingl@nvidia.com>
2022-05-08 14:25   ` [PATCH v3 1/7] vdpa/mlx5: fix interrupt trash that leads to segment fault Xueming Li
2022-05-08 14:25   ` [PATCH v3 2/7] vdpa/mlx5: fix dead loop when process interrupted Xueming Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).