From: Yongseok Koh <yskoh@mellanox.com>
To: <ferruh.yigit@intel.com>
Cc: <dev@dpdk.org>, <adrien.mazarguil@6wind.com>,
<nelio.laranjeiro@6wind.com>, <hhaim@cisco.com>,
Yongseok Koh <yskoh@mellanox.com>
Subject: [dpdk-dev] [PATCH] net/mlx5: fix erroneous index handling for Tx ring
Date: Fri, 5 May 2017 18:20:18 -0700 [thread overview]
Message-ID: <20170506012018.18579-1-yskoh@mellanox.com> (raw)
In case of resource deficiency on Tx, mlx5_tx_burst() breaks the loop
without rolling back consumed resources (txq->wqes[] and txq->elts[]). This
can make application crash because unposted mbufs can be freed while
processing completions. In regard to this, some error-prone/redundant
indexing has been cleaned as well.
Reported-by: Hanoch Haim <hhaim@cisco.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 50 +++++++++++++++++++-------------------------
1 file changed, 21 insertions(+), 29 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 6254228a9..d7176a422 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -493,7 +493,6 @@ uint16_t
mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct txq *txq = (struct txq *)dpdk_txq;
- uint16_t elts_head = txq->elts_head;
const unsigned int elts_n = 1 << txq->elts_n;
unsigned int i = 0;
unsigned int j = 0;
@@ -504,6 +503,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
uint16_t max_wqe;
unsigned int comp;
volatile struct mlx5_wqe_v *wqe = NULL;
+ volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
unsigned int segs_n = 0;
struct rte_mbuf *buf = NULL;
uint8_t *raw;
@@ -514,7 +514,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
- max = (elts_n - (elts_head - txq->elts_tail));
+ max = (elts_n - (txq->elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
@@ -524,8 +524,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
volatile rte_v128u32_t *dseg = NULL;
uint32_t length;
unsigned int ds = 0;
+ unsigned int sg = 0;
uintptr_t addr;
uint64_t naddr;
+ uint16_t elts_head = (txq->elts_head + i + j) & (elts_n - 1);
uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
uint16_t tso_header_sz = 0;
uint16_t ehdr;
@@ -536,7 +538,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
#endif
/* first_seg */
- buf = *(pkts++);
+ buf = *(pkts + i);
segs_n = buf->nb_segs;
/*
* Make sure there is enough room to store this packet and
@@ -547,15 +549,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
break;
max -= segs_n;
--segs_n;
- if (!segs_n)
- --pkts_n;
if (unlikely(--max_wqe == 0))
break;
wqe = (volatile struct mlx5_wqe_v *)
tx_mlx5_wqe(txq, txq->wqe_ci);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
- if (pkts_n > 1)
- rte_prefetch0(*pkts);
+ if (pkts_n - i > 1)
+ rte_prefetch0(*(pkts + i + 1));
addr = rte_pktmbuf_mtod(buf, uintptr_t);
length = DATA_LEN(buf);
ehdr = (((uint8_t *)addr)[1] << 8) |
@@ -567,14 +567,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
break;
/* Update element. */
(*txq->elts)[elts_head] = buf;
- elts_head = (elts_head + 1) & (elts_n - 1);
/* Prefetch next buffer data. */
- if (pkts_n > 1) {
- volatile void *pkt_addr;
-
- pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *);
- rte_prefetch0(pkt_addr);
- }
+ if (pkts_n - i > 1)
+ rte_prefetch0(
+ rte_pktmbuf_mtod(*(pkts + i + 1), volatile void *));
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
@@ -677,10 +673,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
};
ds = 1;
total_length = 0;
- pkts--;
- pkts_n++;
- elts_head = (elts_head - 1) &
- (elts_n - 1);
k++;
goto next_wqe;
}
@@ -813,14 +805,15 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
naddr,
naddr >> 32,
};
- (*txq->elts)[elts_head] = buf;
elts_head = (elts_head + 1) & (elts_n - 1);
- ++j;
+ (*txq->elts)[elts_head] = buf;
+ ++sg;
--segs_n;
+ /* Advance counter only if all segs are successfully posted. */
if (segs_n)
goto next_seg;
else
- --pkts_n;
+ j += sg;
next_pkt:
++i;
/* Initialize known and common part of the WQE structure. */
@@ -853,24 +846,24 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
}
next_wqe:
txq->wqe_ci += (ds + 3) / 4;
+ /* Save the last successful WQE for completion request */
+ last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += total_length;
#endif
- } while (pkts_n);
+ } while (i < pkts_n);
/* Take a shortcut if nothing must be sent. */
if (unlikely((i + k) == 0))
return 0;
+ txq->elts_head = (txq->elts_head + i + j) & (elts_n - 1);
/* Check whether completion threshold has been reached. */
comp = txq->elts_comp + i + j + k;
if (comp >= MLX5_TX_COMP_THRESH) {
- volatile struct mlx5_wqe_ctrl *w =
- (volatile struct mlx5_wqe_ctrl *)wqe;
-
/* Request completion on last WQE. */
- w->ctrl2 = htonl(8);
+ last_wqe->ctrl2 = htonl(8);
/* Save elts_head in unused "immediate" field of WQE. */
- w->ctrl3 = elts_head;
+ last_wqe->ctrl3 = txq->elts_head;
txq->elts_comp = 0;
} else {
txq->elts_comp = comp;
@@ -880,8 +873,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
txq->stats.opackets += i;
#endif
/* Ring QP doorbell. */
- mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe);
- txq->elts_head = elts_head;
+ mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe);
return i;
}
--
2.11.0
next reply other threads:[~2017-05-06 1:20 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-05-06 1:20 Yongseok Koh [this message]
2017-05-09 20:49 ` [dpdk-dev] [PATCH v2 0/2] " Yongseok Koh
2017-05-09 20:49 ` [dpdk-dev] [PATCH v2 1/2] " Yongseok Koh
2017-05-09 20:49 ` [dpdk-dev] [PATCH v2 2/2] net/mlx5: change error-prone code on Tx path Yongseok Koh
2017-05-09 22:10 ` [dpdk-dev] [PATCH v2 0/2] net/mlx5: fix erroneous index handling for Tx ring Adrien Mazarguil
2017-05-10 16:06 ` Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170506012018.18579-1-yskoh@mellanox.com \
--to=yskoh@mellanox.com \
--cc=adrien.mazarguil@6wind.com \
--cc=dev@dpdk.org \
--cc=ferruh.yigit@intel.com \
--cc=hhaim@cisco.com \
--cc=nelio.laranjeiro@6wind.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).