[dpdk-dev] [PATCH v2 3/6] net/mlx5: fix completion queue drain loop

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
To: dev@dpdk.org
Cc: yskoh@mellanox.com, matan@mellanox.com
Subject: [dpdk-dev] [PATCH v2 3/6] net/mlx5: fix completion queue drain loop
Date: Mon,  5 Aug 2019 13:03:51 +0000	[thread overview]
Message-ID: <1565010234-21769-4-git-send-email-viacheslavo@mellanox.com> (raw)
In-Reply-To: <1565010234-21769-1-git-send-email-viacheslavo@mellanox.com>

The completion loop speed optimizations for error-free
operations are done - no CQE field fetch on each loop
iteration. Also, code size is oprimized - the flush
buffers routine is invoked once.

Fixes: 318ea4cfa1b1 ("net/mlx5: fix Tx completion descriptors fetching loop")

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 98 +++++++++++++++++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h |  8 ++--
 2 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 1ec3793..a890f41 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -654,9 +654,10 @@ enum mlx5_txcmp_code {
  *   Pointer to the error CQE.
  *
  * @return
- *   The last Tx buffer element to free.
+ *   Negative value if queue recovery failed,
+ *   the last Tx buffer element to free otherwise.
  */
-uint16_t
+int
 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq,
 			 volatile struct mlx5_err_cqe *err_cqe)
 {
@@ -706,6 +707,7 @@ enum mlx5_txcmp_code {
 			return txq->elts_head;
 		}
 		/* Recovering failed - try again later on the same WQE. */
+		return -1;
 	} else {
 		txq->cq_ci++;
 	}
@@ -2010,6 +2012,45 @@ enum mlx5_txcmp_code {
 }
 
 /**
+ * Update completion queue consuming index via doorbell
+ * and flush the completed data buffers.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param valid CQE pointer
+ *   if not NULL update txq->wqe_pi and flush the buffers
+ * @param itail
+ *   if not negative - flush the buffers till this index.
+ * @param olx
+ *   Configured Tx offloads mask. It is fully defined at
+ *   compile time and may be used for optimization.
+ */
+static __rte_always_inline void
+mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq,
+		   volatile struct mlx5_cqe *last_cqe,
+		   int itail,
+		   unsigned int olx __rte_unused)
+{
+	uint16_t tail;
+
+	if (likely(last_cqe != NULL)) {
+		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
+		tail = ((volatile struct mlx5_wqe_cseg *)
+			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
+	} else if (itail >= 0) {
+		tail = (uint16_t)itail;
+	} else {
+		return;
+	}
+	rte_compiler_barrier();
+	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
+	if (likely(tail != txq->elts_tail)) {
+		mlx5_tx_free_elts(txq, tail, olx);
+		assert(tail == txq->elts_tail);
+	}
+}
+
+/**
  * Manage TX completions. This routine checks the CQ for
  * arrived CQEs, deduces the last accomplished WQE in SQ,
  * updates SQ producing index and frees all completed mbufs.
@@ -2028,10 +2069,11 @@ enum mlx5_txcmp_code {
 			  unsigned int olx __rte_unused)
 {
 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
-	bool update = false;
-	uint16_t tail = txq->elts_tail;
+	volatile struct mlx5_cqe *last_cqe = NULL;
 	int ret;
 
+	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
+	static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value");
 	do {
 		volatile struct mlx5_cqe *cqe;
 
@@ -2043,32 +2085,30 @@ enum mlx5_txcmp_code {
 				assert(ret == MLX5_CQE_STATUS_HW_OWN);
 				break;
 			}
-			/* Some error occurred, try to restart. */
+			/*
+			 * Some error occurred, try to restart.
+			 * We have no barrier after WQE related Doorbell
+			 * written, make sure all writes are completed
+			 * here, before we might perform SQ reset.
+			 */
 			rte_wmb();
-			tail = mlx5_tx_error_cqe_handle
+			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
-			if (likely(tail != txq->elts_tail)) {
-				mlx5_tx_free_elts(txq, tail, olx);
-				assert(tail == txq->elts_tail);
-			}
-			/* Allow flushing all CQEs from the queue. */
-			count = txq->cqe_s;
-		} else {
-			volatile struct mlx5_wqe_cseg *cseg;
-
-			/* Normal transmit completion. */
-			++txq->cq_ci;
-			rte_cio_rmb();
-			txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
-			cseg = (volatile struct mlx5_wqe_cseg *)
-				(txq->wqes + (txq->wqe_pi & txq->wqe_m));
-			tail = cseg->misc;
+			/*
+			 * Flush buffers, update consuming index
+			 * if recovery succeeded. Otherwise
+			 * just try to recover later.
+			 */
+			last_cqe = NULL;
+			break;
 		}
+		/* Normal transmit completion. */
+		++txq->cq_ci;
+		last_cqe = cqe;
 #ifndef NDEBUG
 		if (txq->cq_pi)
 			--txq->cq_pi;
 #endif
-		update = true;
 	/*
 	 * We have to restrict the amount of processed CQEs
 	 * in one tx_burst routine call. The CQ may be large
@@ -2078,17 +2118,7 @@ enum mlx5_txcmp_code {
 	 * latency.
 	 */
 	} while (--count);
-	if (likely(tail != txq->elts_tail)) {
-		/* Free data buffers from elts. */
-		mlx5_tx_free_elts(txq, tail, olx);
-		assert(tail == txq->elts_tail);
-	}
-	if (likely(update)) {
-		/* Update the consumer index. */
-		rte_compiler_barrier();
-		*txq->cq_db =
-		rte_cpu_to_be_32(txq->cq_ci);
-	}
+	mlx5_tx_comp_flush(txq, last_cqe, ret, olx);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index c209d99..aaa02a2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -400,7 +400,7 @@ struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx,
 void mlx5_set_ptype_table(void);
 void mlx5_set_cksum_table(void);
 void mlx5_set_swp_types_table(void);
-__rte_noinline uint16_t mlx5_tx_error_cqe_handle
+__rte_noinline int mlx5_tx_error_cqe_handle
 				(struct mlx5_txq_data *restrict txq,
 				 volatile struct mlx5_err_cqe *err_cqe);
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
@@ -499,9 +499,9 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova,
 
 /* CQE status. */
 enum mlx5_cqe_status {
-	MLX5_CQE_STATUS_SW_OWN,
-	MLX5_CQE_STATUS_HW_OWN,
-	MLX5_CQE_STATUS_ERR,
+	MLX5_CQE_STATUS_SW_OWN = -1,
+	MLX5_CQE_STATUS_HW_OWN = -2,
+	MLX5_CQE_STATUS_ERR = -3,
 };
 
 /**
-- 
1.8.3.1

next prev parent reply	other threads:[~2019-08-05 13:04 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-05 13:03 [dpdk-dev] [PATCH v2 0/6] fix transmit datapath cumulative series Viacheslav Ovsiienko
2019-08-05 13:03 ` [dpdk-dev] [PATCH v2 1/6] net/mlx5: fix default minimal data inline Viacheslav Ovsiienko
2019-08-05 14:41   ` Raslan Darawsheh
2019-08-05 13:03 ` [dpdk-dev] [PATCH v2 2/6] net/mlx5: fix inline data len assert condition Viacheslav Ovsiienko
2019-08-05 13:03 ` Viacheslav Ovsiienko [this message]
2019-08-05 13:03 ` [dpdk-dev] [PATCH v2 4/6] net/mlx5: fix inline data settings Viacheslav Ovsiienko
2019-08-05 13:03 ` [dpdk-dev] [PATCH v2 5/6] net/mlx5: fix packet size inline settings Viacheslav Ovsiienko
2019-08-05 13:03 ` [dpdk-dev] [PATCH v2 6/6] net/mlx5: fix completion queue overflow for large bursts Viacheslav Ovsiienko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1565010234-21769-4-git-send-email-viacheslavo@mellanox.com \
    --to=viacheslavo@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=matan@mellanox.com \
    --cc=yskoh@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).