DPDK patches and discussions
 help / color / mirror / Atom feed
From: Yongseok Koh <yskoh@mellanox.com>
To: <ferruh.yigit@intel.com>
Cc: <dev@dpdk.org>, <adrien.mazarguil@6wind.com>,
	<nelio.laranjeiro@6wind.com>, <hhaim@cisco.com>,
	Yongseok Koh <yskoh@mellanox.com>
Subject: [dpdk-dev] [PATCH] net/mlx5: fix erroneous index handling for Tx ring
Date: Fri, 5 May 2017 18:20:18 -0700	[thread overview]
Message-ID: <20170506012018.18579-1-yskoh@mellanox.com> (raw)

In case of resource deficiency on Tx, mlx5_tx_burst() breaks the loop
without rolling back consumed resources (txq->wqes[] and txq->elts[]). This
can make application crash because unposted mbufs can be freed while
processing completions. In regard to this, some error-prone/redundant
indexing has been cleaned as well.

Reported-by: Hanoch Haim <hhaim@cisco.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 50 +++++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 6254228a9..d7176a422 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -493,7 +493,6 @@ uint16_t
 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	struct txq *txq = (struct txq *)dpdk_txq;
-	uint16_t elts_head = txq->elts_head;
 	const unsigned int elts_n = 1 << txq->elts_n;
 	unsigned int i = 0;
 	unsigned int j = 0;
@@ -504,6 +503,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	uint16_t max_wqe;
 	unsigned int comp;
 	volatile struct mlx5_wqe_v *wqe = NULL;
+	volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
 	unsigned int segs_n = 0;
 	struct rte_mbuf *buf = NULL;
 	uint8_t *raw;
@@ -514,7 +514,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	rte_prefetch0(*pkts);
 	/* Start processing. */
 	txq_complete(txq);
-	max = (elts_n - (elts_head - txq->elts_tail));
+	max = (elts_n - (txq->elts_head - txq->elts_tail));
 	if (max > elts_n)
 		max -= elts_n;
 	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
@@ -524,8 +524,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		volatile rte_v128u32_t *dseg = NULL;
 		uint32_t length;
 		unsigned int ds = 0;
+		unsigned int sg = 0;
 		uintptr_t addr;
 		uint64_t naddr;
+		uint16_t elts_head = (txq->elts_head + i + j) & (elts_n - 1);
 		uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
 		uint16_t tso_header_sz = 0;
 		uint16_t ehdr;
@@ -536,7 +538,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 #endif
 
 		/* first_seg */
-		buf = *(pkts++);
+		buf = *(pkts + i);
 		segs_n = buf->nb_segs;
 		/*
 		 * Make sure there is enough room to store this packet and
@@ -547,15 +549,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			break;
 		max -= segs_n;
 		--segs_n;
-		if (!segs_n)
-			--pkts_n;
 		if (unlikely(--max_wqe == 0))
 			break;
 		wqe = (volatile struct mlx5_wqe_v *)
 			tx_mlx5_wqe(txq, txq->wqe_ci);
 		rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
-		if (pkts_n > 1)
-			rte_prefetch0(*pkts);
+		if (pkts_n - i > 1)
+			rte_prefetch0(*(pkts + i + 1));
 		addr = rte_pktmbuf_mtod(buf, uintptr_t);
 		length = DATA_LEN(buf);
 		ehdr = (((uint8_t *)addr)[1] << 8) |
@@ -567,14 +567,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			break;
 		/* Update element. */
 		(*txq->elts)[elts_head] = buf;
-		elts_head = (elts_head + 1) & (elts_n - 1);
 		/* Prefetch next buffer data. */
-		if (pkts_n > 1) {
-			volatile void *pkt_addr;
-
-			pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *);
-			rte_prefetch0(pkt_addr);
-		}
+		if (pkts_n - i > 1)
+			rte_prefetch0(
+			    rte_pktmbuf_mtod(*(pkts + i + 1), volatile void *));
 		/* Should we enable HW CKSUM offload */
 		if (buf->ol_flags &
 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
@@ -677,10 +673,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 					};
 					ds = 1;
 					total_length = 0;
-					pkts--;
-					pkts_n++;
-					elts_head = (elts_head - 1) &
-						    (elts_n - 1);
 					k++;
 					goto next_wqe;
 				}
@@ -813,14 +805,15 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			naddr,
 			naddr >> 32,
 		};
-		(*txq->elts)[elts_head] = buf;
 		elts_head = (elts_head + 1) & (elts_n - 1);
-		++j;
+		(*txq->elts)[elts_head] = buf;
+		++sg;
 		--segs_n;
+		/* Advance counter only if all segs are successfully posted. */
 		if (segs_n)
 			goto next_seg;
 		else
-			--pkts_n;
+			j += sg;
 next_pkt:
 		++i;
 		/* Initialize known and common part of the WQE structure. */
@@ -853,24 +846,24 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		}
 next_wqe:
 		txq->wqe_ci += (ds + 3) / 4;
+		/* Save the last successful WQE for completion request */
+		last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe;
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		/* Increment sent bytes counter. */
 		txq->stats.obytes += total_length;
 #endif
-	} while (pkts_n);
+	} while (i < pkts_n);
 	/* Take a shortcut if nothing must be sent. */
 	if (unlikely((i + k) == 0))
 		return 0;
+	txq->elts_head = (txq->elts_head + i + j) & (elts_n - 1);
 	/* Check whether completion threshold has been reached. */
 	comp = txq->elts_comp + i + j + k;
 	if (comp >= MLX5_TX_COMP_THRESH) {
-		volatile struct mlx5_wqe_ctrl *w =
-			(volatile struct mlx5_wqe_ctrl *)wqe;
-
 		/* Request completion on last WQE. */
-		w->ctrl2 = htonl(8);
+		last_wqe->ctrl2 = htonl(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
-		w->ctrl3 = elts_head;
+		last_wqe->ctrl3 = txq->elts_head;
 		txq->elts_comp = 0;
 	} else {
 		txq->elts_comp = comp;
@@ -880,8 +873,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	txq->stats.opackets += i;
 #endif
 	/* Ring QP doorbell. */
-	mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe);
-	txq->elts_head = elts_head;
+	mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe);
 	return i;
 }
 
-- 
2.11.0

             reply	other threads:[~2017-05-06  1:20 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-06  1:20 Yongseok Koh [this message]
2017-05-09 20:49 ` [dpdk-dev] [PATCH v2 0/2] " Yongseok Koh
2017-05-09 20:49   ` [dpdk-dev] [PATCH v2 1/2] " Yongseok Koh
2017-05-09 20:49   ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: change error-prone code on Tx path Yongseok Koh
2017-05-09 22:10   ` [dpdk-dev] [PATCH v2 0/2] net/mlx5: fix erroneous index handling for Tx ring Adrien Mazarguil
2017-05-10 16:06     ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170506012018.18579-1-yskoh@mellanox.com \
    --to=yskoh@mellanox.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=hhaim@cisco.com \
    --cc=nelio.laranjeiro@6wind.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).