DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/mlx5: fix completion queue drain loop
@ 2019-08-04 13:57 Viacheslav Ovsiienko
  2019-08-05  6:42 ` Matan Azrad
  2019-09-02  2:38 ` Gavin Hu (Arm Technology China)
  0 siblings, 2 replies; 3+ messages in thread
From: Viacheslav Ovsiienko @ 2019-08-04 13:57 UTC (permalink / raw)
  To: dev; +Cc: yskoh, shahafs

The completion loop speed optimizations for error-free
operations are done - no CQE field fetch on each loop
iteration. Also, code size is oprimized - the flush
buffers routine is invoked once.

Fixes: 318ea4cfa1b1 ("net/mlx5: fix Tx completion descriptors fetching loop")

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 98 +++++++++++++++++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h |  8 ++--
 2 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 1ec3793..a890f41 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -654,9 +654,10 @@ enum mlx5_txcmp_code {
  *   Pointer to the error CQE.
  *
  * @return
- *   The last Tx buffer element to free.
+ *   Negative value if queue recovery failed,
+ *   the last Tx buffer element to free otherwise.
  */
-uint16_t
+int
 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq,
 			 volatile struct mlx5_err_cqe *err_cqe)
 {
@@ -706,6 +707,7 @@ enum mlx5_txcmp_code {
 			return txq->elts_head;
 		}
 		/* Recovering failed - try again later on the same WQE. */
+		return -1;
 	} else {
 		txq->cq_ci++;
 	}
@@ -2010,6 +2012,45 @@ enum mlx5_txcmp_code {
 }
 
 /**
+ * Update completion queue consuming index via doorbell
+ * and flush the completed data buffers.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param valid CQE pointer
+ *   if not NULL update txq->wqe_pi and flush the buffers
+ * @param itail
+ *   if not negative - flush the buffers till this index.
+ * @param olx
+ *   Configured Tx offloads mask. It is fully defined at
+ *   compile time and may be used for optimization.
+ */
+static __rte_always_inline void
+mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq,
+		   volatile struct mlx5_cqe *last_cqe,
+		   int itail,
+		   unsigned int olx __rte_unused)
+{
+	uint16_t tail;
+
+	if (likely(last_cqe != NULL)) {
+		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
+		tail = ((volatile struct mlx5_wqe_cseg *)
+			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
+	} else if (itail >= 0) {
+		tail = (uint16_t)itail;
+	} else {
+		return;
+	}
+	rte_compiler_barrier();
+	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
+	if (likely(tail != txq->elts_tail)) {
+		mlx5_tx_free_elts(txq, tail, olx);
+		assert(tail == txq->elts_tail);
+	}
+}
+
+/**
  * Manage TX completions. This routine checks the CQ for
  * arrived CQEs, deduces the last accomplished WQE in SQ,
  * updates SQ producing index and frees all completed mbufs.
@@ -2028,10 +2069,11 @@ enum mlx5_txcmp_code {
 			  unsigned int olx __rte_unused)
 {
 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
-	bool update = false;
-	uint16_t tail = txq->elts_tail;
+	volatile struct mlx5_cqe *last_cqe = NULL;
 	int ret;
 
+	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
+	static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value");
 	do {
 		volatile struct mlx5_cqe *cqe;
 
@@ -2043,32 +2085,30 @@ enum mlx5_txcmp_code {
 				assert(ret == MLX5_CQE_STATUS_HW_OWN);
 				break;
 			}
-			/* Some error occurred, try to restart. */
+			/*
+			 * Some error occurred, try to restart.
+			 * We have no barrier after WQE related Doorbell
+			 * written, make sure all writes are completed
+			 * here, before we might perform SQ reset.
+			 */
 			rte_wmb();
-			tail = mlx5_tx_error_cqe_handle
+			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
-			if (likely(tail != txq->elts_tail)) {
-				mlx5_tx_free_elts(txq, tail, olx);
-				assert(tail == txq->elts_tail);
-			}
-			/* Allow flushing all CQEs from the queue. */
-			count = txq->cqe_s;
-		} else {
-			volatile struct mlx5_wqe_cseg *cseg;
-
-			/* Normal transmit completion. */
-			++txq->cq_ci;
-			rte_cio_rmb();
-			txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
-			cseg = (volatile struct mlx5_wqe_cseg *)
-				(txq->wqes + (txq->wqe_pi & txq->wqe_m));
-			tail = cseg->misc;
+			/*
+			 * Flush buffers, update consuming index
+			 * if recovery succeeded. Otherwise
+			 * just try to recover later.
+			 */
+			last_cqe = NULL;
+			break;
 		}
+		/* Normal transmit completion. */
+		++txq->cq_ci;
+		last_cqe = cqe;
 #ifndef NDEBUG
 		if (txq->cq_pi)
 			--txq->cq_pi;
 #endif
-		update = true;
 	/*
 	 * We have to restrict the amount of processed CQEs
 	 * in one tx_burst routine call. The CQ may be large
@@ -2078,17 +2118,7 @@ enum mlx5_txcmp_code {
 	 * latency.
 	 */
 	} while (--count);
-	if (likely(tail != txq->elts_tail)) {
-		/* Free data buffers from elts. */
-		mlx5_tx_free_elts(txq, tail, olx);
-		assert(tail == txq->elts_tail);
-	}
-	if (likely(update)) {
-		/* Update the consumer index. */
-		rte_compiler_barrier();
-		*txq->cq_db =
-		rte_cpu_to_be_32(txq->cq_ci);
-	}
+	mlx5_tx_comp_flush(txq, last_cqe, ret, olx);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index c209d99..aaa02a2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -400,7 +400,7 @@ struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx,
 void mlx5_set_ptype_table(void);
 void mlx5_set_cksum_table(void);
 void mlx5_set_swp_types_table(void);
-__rte_noinline uint16_t mlx5_tx_error_cqe_handle
+__rte_noinline int mlx5_tx_error_cqe_handle
 				(struct mlx5_txq_data *restrict txq,
 				 volatile struct mlx5_err_cqe *err_cqe);
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
@@ -499,9 +499,9 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova,
 
 /* CQE status. */
 enum mlx5_cqe_status {
-	MLX5_CQE_STATUS_SW_OWN,
-	MLX5_CQE_STATUS_HW_OWN,
-	MLX5_CQE_STATUS_ERR,
+	MLX5_CQE_STATUS_SW_OWN = -1,
+	MLX5_CQE_STATUS_HW_OWN = -2,
+	MLX5_CQE_STATUS_ERR = -3,
 };
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: fix completion queue drain loop
  2019-08-04 13:57 [dpdk-dev] [PATCH] net/mlx5: fix completion queue drain loop Viacheslav Ovsiienko
@ 2019-08-05  6:42 ` Matan Azrad
  2019-09-02  2:38 ` Gavin Hu (Arm Technology China)
  1 sibling, 0 replies; 3+ messages in thread
From: Matan Azrad @ 2019-08-05  6:42 UTC (permalink / raw)
  To: Slava Ovsiienko, dev; +Cc: Yongseok Koh, Shahaf Shuler



From: Viacheslav Ovsiienko
> The completion loop speed optimizations for error-free operations are done
> - no CQE field fetch on each loop iteration. Also, code size is oprimized - the
> flush buffers routine is invoked once.
> 
> Fixes: 318ea4cfa1b1 ("net/mlx5: fix Tx completion descriptors fetching loop")
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: fix completion queue drain loop
  2019-08-04 13:57 [dpdk-dev] [PATCH] net/mlx5: fix completion queue drain loop Viacheslav Ovsiienko
  2019-08-05  6:42 ` Matan Azrad
@ 2019-09-02  2:38 ` Gavin Hu (Arm Technology China)
  1 sibling, 0 replies; 3+ messages in thread
From: Gavin Hu (Arm Technology China) @ 2019-09-02  2:38 UTC (permalink / raw)
  To: Viacheslav Ovsiienko, dev
  Cc: yskoh, shahafs, Gavin Hu (Arm Technology China),
	Honnappa Nagarahalli, Steve Capper

Hi Viacheslav,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Viacheslav Ovsiienko
> Sent: Sunday, August 4, 2019 9:57 PM
> To: dev@dpdk.org
> Cc: yskoh@mellanox.com; shahafs@mellanox.com
> Subject: [dpdk-dev] [PATCH] net/mlx5: fix completion queue drain loop
>
> The completion loop speed optimizations for error-free
> operations are done - no CQE field fetch on each loop
> iteration. Also, code size is oprimized - the flush
> buffers routine is invoked once.
>
> Fixes: 318ea4cfa1b1 ("net/mlx5: fix Tx completion descriptors fetching loop")
>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 98 +++++++++++++++++++++++++++++----------
> -----
>  drivers/net/mlx5/mlx5_rxtx.h |  8 ++--
>  2 files changed, 68 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 1ec3793..a890f41 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -654,9 +654,10 @@ enum mlx5_txcmp_code {
>   *   Pointer to the error CQE.
>   *
>   * @return
> - *   The last Tx buffer element to free.
> + *   Negative value if queue recovery failed,
> + *   the last Tx buffer element to free otherwise.
>   */
> -uint16_t
> +int
>  mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq,
>                        volatile struct mlx5_err_cqe *err_cqe)
>  {
> @@ -706,6 +707,7 @@ enum mlx5_txcmp_code {
>                       return txq->elts_head;
>               }
>               /* Recovering failed - try again later on the same WQE. */
> +             return -1;
>       } else {
>               txq->cq_ci++;
>       }
> @@ -2010,6 +2012,45 @@ enum mlx5_txcmp_code {
>  }
>
>  /**
> + * Update completion queue consuming index via doorbell
> + * and flush the completed data buffers.
> + *
> + * @param txq
> + *   Pointer to TX queue structure.
> + * @param valid CQE pointer
> + *   if not NULL update txq->wqe_pi and flush the buffers
> + * @param itail
> + *   if not negative - flush the buffers till this index.
> + * @param olx
> + *   Configured Tx offloads mask. It is fully defined at
> + *   compile time and may be used for optimization.
> + */
> +static __rte_always_inline void
> +mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq,
> +                volatile struct mlx5_cqe *last_cqe,
> +                int itail,
> +                unsigned int olx __rte_unused)
> +{
> +     uint16_t tail;
> +
> +     if (likely(last_cqe != NULL)) {
> +             txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
> +             tail = ((volatile struct mlx5_wqe_cseg *)
> +                     (txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
> +     } else if (itail >= 0) {
> +             tail = (uint16_t)itail;
> +     } else {
> +             return;
> +     }
> +     rte_compiler_barrier();
the rte_compiler_barrier is sufficient for x86 like strong memory ordering model, but insufficient for PPC/aarch64 weak memory ordering models.
It should be rte_cio_wmb here to force memory writes above(++txq->cq_ci?) to complete before letting HW knows that.
> +     *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
> +     if (likely(tail != txq->elts_tail)) {
> +             mlx5_tx_free_elts(txq, tail, olx);
> +             assert(tail == txq->elts_tail);
> +     }
> +}
> +
> +/**
>   * Manage TX completions. This routine checks the CQ for
>   * arrived CQEs, deduces the last accomplished WQE in SQ,
>   * updates SQ producing index and frees all completed mbufs.
> @@ -2028,10 +2069,11 @@ enum mlx5_txcmp_code {
>                         unsigned int olx __rte_unused)
>  {
>       unsigned int count = MLX5_TX_COMP_MAX_CQE;
> -     bool update = false;
> -     uint16_t tail = txq->elts_tail;
> +     volatile struct mlx5_cqe *last_cqe = NULL;
>       int ret;
>
> +     static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative
> value");
> +     static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative
> value");
>       do {
>               volatile struct mlx5_cqe *cqe;
>
> @@ -2043,32 +2085,30 @@ enum mlx5_txcmp_code {
>                               assert(ret == MLX5_CQE_STATUS_HW_OWN);
>                               break;
>                       }
> -                     /* Some error occurred, try to restart. */
> +                     /*
> +                      * Some error occurred, try to restart.
> +                      * We have no barrier after WQE related Doorbell
> +                      * written, make sure all writes are completed
> +                      * here, before we might perform SQ reset.
> +                      */
>                       rte_wmb();
> -                     tail = mlx5_tx_error_cqe_handle
> +                     ret = mlx5_tx_error_cqe_handle
>                               (txq, (volatile struct mlx5_err_cqe *)cqe);
> -                     if (likely(tail != txq->elts_tail)) {
> -                             mlx5_tx_free_elts(txq, tail, olx);
> -                             assert(tail == txq->elts_tail);
> -                     }
> -                     /* Allow flushing all CQEs from the queue. */
> -                     count = txq->cqe_s;
> -             } else {
> -                     volatile struct mlx5_wqe_cseg *cseg;
> -
> -                     /* Normal transmit completion. */
> -                     ++txq->cq_ci;
> -                     rte_cio_rmb();
> -                     txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
> -                     cseg = (volatile struct mlx5_wqe_cseg *)
> -                             (txq->wqes + (txq->wqe_pi & txq->wqe_m));
> -                     tail = cseg->misc;
> +                     /*
> +                      * Flush buffers, update consuming index
> +                      * if recovery succeeded. Otherwise
> +                      * just try to recover later.
> +                      */
> +                     last_cqe = NULL;
> +                     break;
>               }
> +             /* Normal transmit completion. */
> +             ++txq->cq_ci;
> +             last_cqe = cqe;
>  #ifndef NDEBUG
>               if (txq->cq_pi)
>                       --txq->cq_pi;
>  #endif
> -             update = true;
>       /*
>        * We have to restrict the amount of processed CQEs
>        * in one tx_burst routine call. The CQ may be large
> @@ -2078,17 +2118,7 @@ enum mlx5_txcmp_code {
>        * latency.
>        */
>       } while (--count);
> -     if (likely(tail != txq->elts_tail)) {
> -             /* Free data buffers from elts. */
> -             mlx5_tx_free_elts(txq, tail, olx);
> -             assert(tail == txq->elts_tail);
> -     }
> -     if (likely(update)) {
> -             /* Update the consumer index. */
> -             rte_compiler_barrier();
> -             *txq->cq_db =
> -             rte_cpu_to_be_32(txq->cq_ci);
> -     }
> +     mlx5_tx_comp_flush(txq, last_cqe, ret, olx);
>  }
>
>  /**
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index c209d99..aaa02a2 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -400,7 +400,7 @@ struct mlx5_txq_ctrl *mlx5_txq_new(struct
> rte_eth_dev *dev, uint16_t idx,
>  void mlx5_set_ptype_table(void);
>  void mlx5_set_cksum_table(void);
>  void mlx5_set_swp_types_table(void);
> -__rte_noinline uint16_t mlx5_tx_error_cqe_handle
> +__rte_noinline int mlx5_tx_error_cqe_handle
>                               (struct mlx5_txq_data *restrict txq,
>                                volatile struct mlx5_err_cqe *err_cqe);
>  uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t
> pkts_n);
> @@ -499,9 +499,9 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev,
> void *addr, uint64_t iova,
>
>  /* CQE status. */
>  enum mlx5_cqe_status {
> -     MLX5_CQE_STATUS_SW_OWN,
> -     MLX5_CQE_STATUS_HW_OWN,
> -     MLX5_CQE_STATUS_ERR,
> +     MLX5_CQE_STATUS_SW_OWN = -1,
> +     MLX5_CQE_STATUS_HW_OWN = -2,
> +     MLX5_CQE_STATUS_ERR = -3,
>  };
>
>  /**
> --
> 1.8.3.1

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-09-02  2:38 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-04 13:57 [dpdk-dev] [PATCH] net/mlx5: fix completion queue drain loop Viacheslav Ovsiienko
2019-08-05  6:42 ` Matan Azrad
2019-09-02  2:38 ` Gavin Hu (Arm Technology China)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).