patches for DPDK stable branches
 help / color / mirror / Atom feed
* [dpdk-stable] [PATCH] net/mlx5: reduce txq completion index memory loads
@ 2020-03-16 15:34 Alexander Kozyrev
  2020-03-17 13:10 ` Raslan Darawsheh
  0 siblings, 1 reply; 2+ messages in thread
From: Alexander Kozyrev @ 2020-03-16 15:34 UTC (permalink / raw)
  To: dev; +Cc: rasland, matan, viacheslavo, stable

There is a non-optimal check if doorbel is needed present in the
mlx5_tx_handle_completion() function. Advancing a copy of the txq
consumer index and checking this copy with initial value causes
unnecessary memory loads and hurts the performance. It is better to
have a simple small boolean variable for this purpose. That allows
to eliminate all the excessive memory operations with the txq consumer
index and restore the performance of the tx completions.

Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine")
Cc: stable@dpdk.org

Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5ac63da..f3bf763 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2160,7 +2160,7 @@ enum mlx5_txcmp_code {
 {
 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
 	volatile struct mlx5_cqe *last_cqe = NULL;
-	uint16_t ci = txq->cq_ci;
+	bool ring_doorbell = false;
 	int ret;
 
 	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
@@ -2168,8 +2168,8 @@ enum mlx5_txcmp_code {
 	do {
 		volatile struct mlx5_cqe *cqe;
 
-		cqe = &txq->cqes[ci & txq->cqe_m];
-		ret = check_cqe(cqe, txq->cqe_s, ci);
+		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
+		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
 			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
 				/* No new CQEs in completion queue. */
@@ -2183,7 +2183,6 @@ enum mlx5_txcmp_code {
 			 * here, before we might perform SQ reset.
 			 */
 			rte_wmb();
-			txq->cq_ci = ci;
 			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
 			if (unlikely(ret < 0)) {
@@ -2199,16 +2198,18 @@ enum mlx5_txcmp_code {
 			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
 			 * The send queue is supposed to be empty.
 			 */
-			++ci;
-			txq->cq_pi = ci;
+			ring_doorbell = true;
+			++txq->cq_ci;
+			txq->cq_pi = txq->cq_ci;
 			last_cqe = NULL;
 			continue;
 		}
 		/* Normal transmit completion. */
-		MLX5_ASSERT(ci != txq->cq_pi);
-		MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
+		MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
+		MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
 			    cqe->wqe_counter);
-		++ci;
+		ring_doorbell = true;
+		++txq->cq_ci;
 		last_cqe = cqe;
 		/*
 		 * We have to restrict the amount of processed CQEs
@@ -2221,14 +2222,10 @@ enum mlx5_txcmp_code {
 		if (likely(--count == 0))
 			break;
 	} while (true);
-	if (likely(ci != txq->cq_ci)) {
-		/*
-		 * Update completion queue consuming index
-		 * and ring doorbell to notify hardware.
-		 */
+	if (likely(ring_doorbell)) {
+		/* Ring doorbell to notify hardware. */
 		rte_compiler_barrier();
-		txq->cq_ci = ci;
-		*txq->cq_db = rte_cpu_to_be_32(ci);
+		*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
 		mlx5_tx_comp_flush(txq, last_cqe, olx);
 	}
 }
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [dpdk-stable] [PATCH] net/mlx5: reduce txq completion index memory loads
  2020-03-16 15:34 [dpdk-stable] [PATCH] net/mlx5: reduce txq completion index memory loads Alexander Kozyrev
@ 2020-03-17 13:10 ` Raslan Darawsheh
  0 siblings, 0 replies; 2+ messages in thread
From: Raslan Darawsheh @ 2020-03-17 13:10 UTC (permalink / raw)
  To: Alexander Kozyrev, dev; +Cc: Matan Azrad, Slava Ovsiienko, stable

Hi,

> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@mellanox.com>
> Sent: Monday, March 16, 2020 5:35 PM
> To: dev@dpdk.org
> Cc: Raslan Darawsheh <rasland@mellanox.com>; Matan Azrad
> <matan@mellanox.com>; Slava Ovsiienko <viacheslavo@mellanox.com>;
> stable@dpdk.org
> Subject: [PATCH] net/mlx5: reduce txq completion index memory loads
> 
> There is a non-optimal check if doorbel is needed present in the
> mlx5_tx_handle_completion() function. Advancing a copy of the txq
> consumer index and checking this copy with initial value causes
> unnecessary memory loads and hurts the performance. It is better to
> have a simple small boolean variable for this purpose. That allows
> to eliminate all the excessive memory operations with the txq consumer
> index and restore the performance of the tx completions.
> 
> Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++----------------
>  1 file changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 5ac63da..f3bf763 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -2160,7 +2160,7 @@ enum mlx5_txcmp_code {
>  {
>  	unsigned int count = MLX5_TX_COMP_MAX_CQE;
>  	volatile struct mlx5_cqe *last_cqe = NULL;
> -	uint16_t ci = txq->cq_ci;
> +	bool ring_doorbell = false;
>  	int ret;
> 
>  	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative
> value");
> @@ -2168,8 +2168,8 @@ enum mlx5_txcmp_code {
>  	do {
>  		volatile struct mlx5_cqe *cqe;
> 
> -		cqe = &txq->cqes[ci & txq->cqe_m];
> -		ret = check_cqe(cqe, txq->cqe_s, ci);
> +		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
> +		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
>  		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
>  			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
>  				/* No new CQEs in completion queue. */
> @@ -2183,7 +2183,6 @@ enum mlx5_txcmp_code {
>  			 * here, before we might perform SQ reset.
>  			 */
>  			rte_wmb();
> -			txq->cq_ci = ci;
>  			ret = mlx5_tx_error_cqe_handle
>  				(txq, (volatile struct mlx5_err_cqe *)cqe);
>  			if (unlikely(ret < 0)) {
> @@ -2199,16 +2198,18 @@ enum mlx5_txcmp_code {
>  			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
>  			 * The send queue is supposed to be empty.
>  			 */
> -			++ci;
> -			txq->cq_pi = ci;
> +			ring_doorbell = true;
> +			++txq->cq_ci;
> +			txq->cq_pi = txq->cq_ci;
>  			last_cqe = NULL;
>  			continue;
>  		}
>  		/* Normal transmit completion. */
> -		MLX5_ASSERT(ci != txq->cq_pi);
> -		MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
> +		MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
> +		MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16)
> ==
>  			    cqe->wqe_counter);
> -		++ci;
> +		ring_doorbell = true;
> +		++txq->cq_ci;
>  		last_cqe = cqe;
>  		/*
>  		 * We have to restrict the amount of processed CQEs
> @@ -2221,14 +2222,10 @@ enum mlx5_txcmp_code {
>  		if (likely(--count == 0))
>  			break;
>  	} while (true);
> -	if (likely(ci != txq->cq_ci)) {
> -		/*
> -		 * Update completion queue consuming index
> -		 * and ring doorbell to notify hardware.
> -		 */
> +	if (likely(ring_doorbell)) {
> +		/* Ring doorbell to notify hardware. */
>  		rte_compiler_barrier();
> -		txq->cq_ci = ci;
> -		*txq->cq_db = rte_cpu_to_be_32(ci);
> +		*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
>  		mlx5_tx_comp_flush(txq, last_cqe, olx);
>  	}
>  }
> --
> 1.8.3.1


Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-03-17 13:10 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-16 15:34 [dpdk-stable] [PATCH] net/mlx5: reduce txq completion index memory loads Alexander Kozyrev
2020-03-17 13:10 ` Raslan Darawsheh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).