* [dpdk-dev] [PATCH] net/mlx5: reduce txq completion index memory loads
@ 2020-03-16 15:34 Alexander Kozyrev
2020-03-17 13:10 ` Raslan Darawsheh
0 siblings, 1 reply; 2+ messages in thread
From: Alexander Kozyrev @ 2020-03-16 15:34 UTC (permalink / raw)
To: dev; +Cc: rasland, matan, viacheslavo, stable
There is a non-optimal check if doorbel is needed present in the
mlx5_tx_handle_completion() function. Advancing a copy of the txq
consumer index and checking this copy with initial value causes
unnecessary memory loads and hurts the performance. It is better to
have a simple small boolean variable for this purpose. That allows
to eliminate all the excessive memory operations with the txq consumer
index and restore the performance of the tx completions.
Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine")
Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++----------------
1 file changed, 13 insertions(+), 16 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5ac63da..f3bf763 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2160,7 +2160,7 @@ enum mlx5_txcmp_code {
{
unsigned int count = MLX5_TX_COMP_MAX_CQE;
volatile struct mlx5_cqe *last_cqe = NULL;
- uint16_t ci = txq->cq_ci;
+ bool ring_doorbell = false;
int ret;
static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
@@ -2168,8 +2168,8 @@ enum mlx5_txcmp_code {
do {
volatile struct mlx5_cqe *cqe;
- cqe = &txq->cqes[ci & txq->cqe_m];
- ret = check_cqe(cqe, txq->cqe_s, ci);
+ cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
+ ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
if (likely(ret != MLX5_CQE_STATUS_ERR)) {
/* No new CQEs in completion queue. */
@@ -2183,7 +2183,6 @@ enum mlx5_txcmp_code {
* here, before we might perform SQ reset.
*/
rte_wmb();
- txq->cq_ci = ci;
ret = mlx5_tx_error_cqe_handle
(txq, (volatile struct mlx5_err_cqe *)cqe);
if (unlikely(ret < 0)) {
@@ -2199,16 +2198,18 @@ enum mlx5_txcmp_code {
* MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
* The send queue is supposed to be empty.
*/
- ++ci;
- txq->cq_pi = ci;
+ ring_doorbell = true;
+ ++txq->cq_ci;
+ txq->cq_pi = txq->cq_ci;
last_cqe = NULL;
continue;
}
/* Normal transmit completion. */
- MLX5_ASSERT(ci != txq->cq_pi);
- MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
+ MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
+ MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
cqe->wqe_counter);
- ++ci;
+ ring_doorbell = true;
+ ++txq->cq_ci;
last_cqe = cqe;
/*
* We have to restrict the amount of processed CQEs
@@ -2221,14 +2222,10 @@ enum mlx5_txcmp_code {
if (likely(--count == 0))
break;
} while (true);
- if (likely(ci != txq->cq_ci)) {
- /*
- * Update completion queue consuming index
- * and ring doorbell to notify hardware.
- */
+ if (likely(ring_doorbell)) {
+ /* Ring doorbell to notify hardware. */
rte_compiler_barrier();
- txq->cq_ci = ci;
- *txq->cq_db = rte_cpu_to_be_32(ci);
+ *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
mlx5_tx_comp_flush(txq, last_cqe, olx);
}
}
--
1.8.3.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [dpdk-dev] [PATCH] net/mlx5: reduce txq completion index memory loads
2020-03-16 15:34 [dpdk-dev] [PATCH] net/mlx5: reduce txq completion index memory loads Alexander Kozyrev
@ 2020-03-17 13:10 ` Raslan Darawsheh
0 siblings, 0 replies; 2+ messages in thread
From: Raslan Darawsheh @ 2020-03-17 13:10 UTC (permalink / raw)
To: Alexander Kozyrev, dev; +Cc: Matan Azrad, Slava Ovsiienko, stable
Hi,
> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@mellanox.com>
> Sent: Monday, March 16, 2020 5:35 PM
> To: dev@dpdk.org
> Cc: Raslan Darawsheh <rasland@mellanox.com>; Matan Azrad
> <matan@mellanox.com>; Slava Ovsiienko <viacheslavo@mellanox.com>;
> stable@dpdk.org
> Subject: [PATCH] net/mlx5: reduce txq completion index memory loads
>
> There is a non-optimal check if doorbel is needed present in the
> mlx5_tx_handle_completion() function. Advancing a copy of the txq
> consumer index and checking this copy with initial value causes
> unnecessary memory loads and hurts the performance. It is better to
> have a simple small boolean variable for this purpose. That allows
> to eliminate all the excessive memory operations with the txq consumer
> index and restore the performance of the tx completions.
>
> Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine")
> Cc: stable@dpdk.org
>
> Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
> drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++----------------
> 1 file changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 5ac63da..f3bf763 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -2160,7 +2160,7 @@ enum mlx5_txcmp_code {
> {
> unsigned int count = MLX5_TX_COMP_MAX_CQE;
> volatile struct mlx5_cqe *last_cqe = NULL;
> - uint16_t ci = txq->cq_ci;
> + bool ring_doorbell = false;
> int ret;
>
> static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative
> value");
> @@ -2168,8 +2168,8 @@ enum mlx5_txcmp_code {
> do {
> volatile struct mlx5_cqe *cqe;
>
> - cqe = &txq->cqes[ci & txq->cqe_m];
> - ret = check_cqe(cqe, txq->cqe_s, ci);
> + cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
> + ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
> if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
> if (likely(ret != MLX5_CQE_STATUS_ERR)) {
> /* No new CQEs in completion queue. */
> @@ -2183,7 +2183,6 @@ enum mlx5_txcmp_code {
> * here, before we might perform SQ reset.
> */
> rte_wmb();
> - txq->cq_ci = ci;
> ret = mlx5_tx_error_cqe_handle
> (txq, (volatile struct mlx5_err_cqe *)cqe);
> if (unlikely(ret < 0)) {
> @@ -2199,16 +2198,18 @@ enum mlx5_txcmp_code {
> * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
> * The send queue is supposed to be empty.
> */
> - ++ci;
> - txq->cq_pi = ci;
> + ring_doorbell = true;
> + ++txq->cq_ci;
> + txq->cq_pi = txq->cq_ci;
> last_cqe = NULL;
> continue;
> }
> /* Normal transmit completion. */
> - MLX5_ASSERT(ci != txq->cq_pi);
> - MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
> + MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
> + MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16)
> ==
> cqe->wqe_counter);
> - ++ci;
> + ring_doorbell = true;
> + ++txq->cq_ci;
> last_cqe = cqe;
> /*
> * We have to restrict the amount of processed CQEs
> @@ -2221,14 +2222,10 @@ enum mlx5_txcmp_code {
> if (likely(--count == 0))
> break;
> } while (true);
> - if (likely(ci != txq->cq_ci)) {
> - /*
> - * Update completion queue consuming index
> - * and ring doorbell to notify hardware.
> - */
> + if (likely(ring_doorbell)) {
> + /* Ring doorbell to notify hardware. */
> rte_compiler_barrier();
> - txq->cq_ci = ci;
> - *txq->cq_db = rte_cpu_to_be_32(ci);
> + *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
> mlx5_tx_comp_flush(txq, last_cqe, olx);
> }
> }
> --
> 1.8.3.1
Patch applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-03-17 13:10 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-16 15:34 [dpdk-dev] [PATCH] net/mlx5: reduce txq completion index memory loads Alexander Kozyrev
2020-03-17 13:10 ` Raslan Darawsheh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).