DPDK patches and discussions
 help / color / mirror / Atom feed
From: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
To: dev@dpdk.org
Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>, stable@dpdk.org
Subject: [dpdk-dev] [PATCH 2/3] net/mlx5: fix Tx WQE corruption caused by starvation
Date: Thu,  2 Feb 2017 11:34:12 +0100	[thread overview]
Message-ID: <d7ac17e728f9967237c10423a2f89528042f88dd.1486031307.git.nelio.laranjeiro@6wind.com> (raw)
In-Reply-To: <fb871683b18cb5a8db2145ea71f0d9a41d6f8683.1486031307.git.nelio.laranjeiro@6wind.com>
In-Reply-To: <fb871683b18cb5a8db2145ea71f0d9a41d6f8683.1486031307.git.nelio.laranjeiro@6wind.com>

Fixes an issue which may occurs with the inline feature activated and a
packet greater than the max_inline requested.

In such situation, more work request elements can be consumed and in the
worst case override some still handled by the NIC, this can result in
sending garbage on the network or putting the work queue in error.

Fixes: 2a66cf378954 ("net/mlx5: support inline send")

Cc: stable@dpdk.org
Reported-by: Elad Persiko <eladpe@mellanox.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 91 +++++++++++++++++++++++++++++++++++++-------
 drivers/net/mlx5/mlx5_rxtx.h |  1 +
 2 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ffca21c..a0e15ac 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -238,8 +238,9 @@ txq_complete(struct txq *txq)
 	} while (1);
 	if (unlikely(cqe == NULL))
 		return;
+	txq->wqe_pi = ntohs(cqe->wqe_counter);
 	ctrl = (volatile struct mlx5_wqe_ctrl *)
-		tx_mlx5_wqe(txq, ntohs(cqe->wqe_counter));
+		tx_mlx5_wqe(txq, txq->wqe_pi);
 	elts_tail = ctrl->ctrl3;
 	assert(elts_tail < (1 << txq->wqe_n));
 	/* Free buffers. */
@@ -365,6 +366,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	unsigned int i = 0;
 	unsigned int j = 0;
 	unsigned int max;
+	uint16_t max_wqe;
 	unsigned int comp;
 	volatile struct mlx5_wqe_v *wqe = NULL;
 	unsigned int segs_n = 0;
@@ -380,6 +382,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	max = (elts_n - (elts_head - txq->elts_tail));
 	if (max > elts_n)
 		max -= elts_n;
+	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
+	if (unlikely(!max_wqe))
+		return 0;
 	do {
 		volatile rte_v128u32_t *dseg = NULL;
 		uint32_t length;
@@ -407,6 +412,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		--segs_n;
 		if (!segs_n)
 			--pkts_n;
+		if (unlikely(--max_wqe == 0))
+			break;
 		wqe = (volatile struct mlx5_wqe_v *)
 			tx_mlx5_wqe(txq, txq->wqe_ci);
 		rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
@@ -476,10 +483,19 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			if (room > max_inline) {
 				uintptr_t addr_end = (addr + max_inline) &
 					~(RTE_CACHE_LINE_SIZE - 1);
-				uint16_t copy_b = ((addr_end - addr) > length) ?
-						  length :
-						  (addr_end - addr);
+				unsigned int copy_b =
+					RTE_MIN((addr_end - addr), length);
+				uint16_t n;
 
+				/*
+				 * One Dseg remains in the current WQE.  To
+				 * keep the computation positive, it is
+				 * removed after the bytes to Dseg conversion.
+				 */
+				n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
+				if (unlikely(max_wqe < n))
+					break;
+				max_wqe -= n;
 				rte_memcpy((void *)raw, (void *)addr, copy_b);
 				addr += copy_b;
 				length -= copy_b;
@@ -493,12 +509,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			 */
 			ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
 			if (length > 0) {
-				dseg = (volatile rte_v128u32_t *)
-					((uintptr_t)wqe +
-					 (ds * MLX5_WQE_DWORD_SIZE));
-				if ((uintptr_t)dseg >= end)
+				if (ds % (MLX5_WQE_SIZE /
+					  MLX5_WQE_DWORD_SIZE) == 0) {
+					if (unlikely(--max_wqe == 0))
+						break;
+					dseg = (volatile rte_v128u32_t *)
+					       tx_mlx5_wqe(txq, txq->wqe_ci +
+							   ds / 4);
+				} else {
 					dseg = (volatile rte_v128u32_t *)
-					       txq->wqes;
+						((uintptr_t)wqe +
+						 (ds * MLX5_WQE_DWORD_SIZE));
+				}
 				goto use_dseg;
 			} else if (!segs_n) {
 				goto next_pkt;
@@ -541,12 +563,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		 */
 		assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
 		if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
-			unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) &
-				((1 << txq->wqe_n) - 1);
-
+			if (unlikely(--max_wqe == 0))
+				break;
 			dseg = (volatile rte_v128u32_t *)
-			       tx_mlx5_wqe(txq, n);
-			rte_prefetch0(tx_mlx5_wqe(txq, n + 1));
+			       tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4);
+			rte_prefetch0(tx_mlx5_wqe(txq,
+						  txq->wqe_ci + ds / 4 + 1));
 		} else {
 			++dseg;
 		}
@@ -711,6 +733,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	unsigned int i = 0;
 	unsigned int j = 0;
 	unsigned int max;
+	uint16_t max_wqe;
 	unsigned int comp;
 	struct mlx5_mpw mpw = {
 		.state = MLX5_MPW_STATE_CLOSED,
@@ -726,6 +749,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	max = (elts_n - (elts_head - txq->elts_tail));
 	if (max > elts_n)
 		max -= elts_n;
+	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
+	if (unlikely(!max_wqe))
+		return 0;
 	do {
 		struct rte_mbuf *buf = *(pkts++);
 		unsigned int elts_head_next;
@@ -759,6 +785,14 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		     (mpw.wqe->eseg.cs_flags != cs_flags)))
 			mlx5_mpw_close(txq, &mpw);
 		if (mpw.state == MLX5_MPW_STATE_CLOSED) {
+			/*
+			 * Multi-Packet WQE consumes at most two WQE.
+			 * mlx5_mpw_new() expects to be able to use such
+			 * resources.
+			 */
+			if (unlikely(max_wqe < 2))
+				break;
+			max_wqe -= 2;
 			mlx5_mpw_new(txq, &mpw, length);
 			mpw.wqe->eseg.cs_flags = cs_flags;
 		}
@@ -914,11 +948,24 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 	unsigned int i = 0;
 	unsigned int j = 0;
 	unsigned int max;
+	uint16_t max_wqe;
 	unsigned int comp;
 	unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
 	struct mlx5_mpw mpw = {
 		.state = MLX5_MPW_STATE_CLOSED,
 	};
+	/*
+	 * Compute the maximum number of WQE which can be consumed by inline
+	 * code.
+	 * - 2 DSEG for:
+	 *   - 1 control segment,
+	 *   - 1 Ethernet segment,
+	 * - N Dseg from the inline request.
+	 */
+	const unsigned int wqe_inl_n =
+		((2 * MLX5_WQE_DWORD_SIZE +
+		  txq->max_inline * RTE_CACHE_LINE_SIZE) +
+		 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
 
 	if (unlikely(!pkts_n))
 		return 0;
@@ -950,6 +997,11 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 			break;
 		max -= segs_n;
 		--pkts_n;
+		/*
+		 * Compute max_wqe in case less WQE were consumed in previous
+		 * iteration.
+		 */
+		max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
 		/* Should we enable HW CKSUM offload */
 		if (buf->ol_flags &
 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
@@ -975,9 +1027,20 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 		if (mpw.state == MLX5_MPW_STATE_CLOSED) {
 			if ((segs_n != 1) ||
 			    (length > inline_room)) {
+				/*
+				 * Multi-Packet WQE consumes at most two WQE.
+				 * mlx5_mpw_new() expects to be able to use
+				 * such resources.
+				 */
+				if (unlikely(max_wqe < 2))
+					break;
+				max_wqe -= 2;
 				mlx5_mpw_new(txq, &mpw, length);
 				mpw.wqe->eseg.cs_flags = cs_flags;
 			} else {
+				if (unlikely(max_wqe < wqe_inl_n))
+					break;
+				max_wqe -= wqe_inl_n;
 				mlx5_mpw_inline_new(txq, &mpw, length);
 				mpw.wqe->eseg.cs_flags = cs_flags;
 			}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 302ca49..41a34d7 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -249,6 +249,7 @@ struct txq {
 	uint16_t elts_comp; /* Counter since last completion request. */
 	uint16_t cq_ci; /* Consumer index for completion queue. */
 	uint16_t wqe_ci; /* Consumer index for work queue. */
+	uint16_t wqe_pi; /* Producer index for work queue. */
 	uint16_t elts_n:4; /* (*elts)[] length (in log2). */
 	uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
 	uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */
-- 
2.1.4

  reply	other threads:[~2017-02-02 10:35 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-02 10:34 [dpdk-dev] [PATCH 1/3] net/mlx5: fix Ethernet header re-writing Nelio Laranjeiro
2017-02-02 10:34 ` Nelio Laranjeiro [this message]
2017-02-02 10:34 ` [dpdk-dev] [PATCH 3/3] net/mlx5: fix inline WQE consumption Nelio Laranjeiro
2017-02-02 15:34 ` [dpdk-dev] [PATCH 1/3] net/mlx5: fix Ethernet header re-writing Ferruh Yigit
2017-02-02 16:38   ` Nélio Laranjeiro
2017-02-02 21:56 ` Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d7ac17e728f9967237c10423a2f89528042f88dd.1486031307.git.nelio.laranjeiro@6wind.com \
    --to=nelio.laranjeiro@6wind.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=stable@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).