* [dpdk-stable] [PATCH 2/3] net/mlx5: fix Tx WQE corruption caused by starvation
[not found] <fb871683b18cb5a8db2145ea71f0d9a41d6f8683.1486031307.git.nelio.laranjeiro@6wind.com>
@ 2017-02-02 10:34 ` Nelio Laranjeiro
2017-02-02 10:34 ` [dpdk-stable] [PATCH 3/3] net/mlx5: fix inline WQE consumption Nelio Laranjeiro
1 sibling, 0 replies; 2+ messages in thread
From: Nelio Laranjeiro @ 2017-02-02 10:34 UTC (permalink / raw)
To: dev; +Cc: Adrien Mazarguil, stable
Fixes an issue which may occurs with the inline feature activated and a
packet greater than the max_inline requested.
In such situation, more work request elements can be consumed and in the
worst case override some still handled by the NIC, this can result in
sending garbage on the network or putting the work queue in error.
Fixes: 2a66cf378954 ("net/mlx5: support inline send")
Cc: stable@dpdk.org
Reported-by: Elad Persiko <eladpe@mellanox.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 91 +++++++++++++++++++++++++++++++++++++-------
drivers/net/mlx5/mlx5_rxtx.h | 1 +
2 files changed, 78 insertions(+), 14 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ffca21c..a0e15ac 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -238,8 +238,9 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
+ txq->wqe_pi = ntohs(cqe->wqe_counter);
ctrl = (volatile struct mlx5_wqe_ctrl *)
- tx_mlx5_wqe(txq, ntohs(cqe->wqe_counter));
+ tx_mlx5_wqe(txq, txq->wqe_pi);
elts_tail = ctrl->ctrl3;
assert(elts_tail < (1 << txq->wqe_n));
/* Free buffers. */
@@ -365,6 +366,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
unsigned int i = 0;
unsigned int j = 0;
unsigned int max;
+ uint16_t max_wqe;
unsigned int comp;
volatile struct mlx5_wqe_v *wqe = NULL;
unsigned int segs_n = 0;
@@ -380,6 +382,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
+ max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
+ if (unlikely(!max_wqe))
+ return 0;
do {
volatile rte_v128u32_t *dseg = NULL;
uint32_t length;
@@ -407,6 +412,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
--segs_n;
if (!segs_n)
--pkts_n;
+ if (unlikely(--max_wqe == 0))
+ break;
wqe = (volatile struct mlx5_wqe_v *)
tx_mlx5_wqe(txq, txq->wqe_ci);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
@@ -476,10 +483,19 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (room > max_inline) {
uintptr_t addr_end = (addr + max_inline) &
~(RTE_CACHE_LINE_SIZE - 1);
- uint16_t copy_b = ((addr_end - addr) > length) ?
- length :
- (addr_end - addr);
+ unsigned int copy_b =
+ RTE_MIN((addr_end - addr), length);
+ uint16_t n;
+ /*
+ * One Dseg remains in the current WQE. To
+ * keep the computation positive, it is
+ * removed after the bytes to Dseg conversion.
+ */
+ n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
+ if (unlikely(max_wqe < n))
+ break;
+ max_wqe -= n;
rte_memcpy((void *)raw, (void *)addr, copy_b);
addr += copy_b;
length -= copy_b;
@@ -493,12 +509,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
*/
ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
if (length > 0) {
- dseg = (volatile rte_v128u32_t *)
- ((uintptr_t)wqe +
- (ds * MLX5_WQE_DWORD_SIZE));
- if ((uintptr_t)dseg >= end)
+ if (ds % (MLX5_WQE_SIZE /
+ MLX5_WQE_DWORD_SIZE) == 0) {
+ if (unlikely(--max_wqe == 0))
+ break;
+ dseg = (volatile rte_v128u32_t *)
+ tx_mlx5_wqe(txq, txq->wqe_ci +
+ ds / 4);
+ } else {
dseg = (volatile rte_v128u32_t *)
- txq->wqes;
+ ((uintptr_t)wqe +
+ (ds * MLX5_WQE_DWORD_SIZE));
+ }
goto use_dseg;
} else if (!segs_n) {
goto next_pkt;
@@ -541,12 +563,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
*/
assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
- unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) &
- ((1 << txq->wqe_n) - 1);
-
+ if (unlikely(--max_wqe == 0))
+ break;
dseg = (volatile rte_v128u32_t *)
- tx_mlx5_wqe(txq, n);
- rte_prefetch0(tx_mlx5_wqe(txq, n + 1));
+ tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4);
+ rte_prefetch0(tx_mlx5_wqe(txq,
+ txq->wqe_ci + ds / 4 + 1));
} else {
++dseg;
}
@@ -711,6 +733,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
unsigned int i = 0;
unsigned int j = 0;
unsigned int max;
+ uint16_t max_wqe;
unsigned int comp;
struct mlx5_mpw mpw = {
.state = MLX5_MPW_STATE_CLOSED,
@@ -726,6 +749,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
+ max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
+ if (unlikely(!max_wqe))
+ return 0;
do {
struct rte_mbuf *buf = *(pkts++);
unsigned int elts_head_next;
@@ -759,6 +785,14 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
(mpw.wqe->eseg.cs_flags != cs_flags)))
mlx5_mpw_close(txq, &mpw);
if (mpw.state == MLX5_MPW_STATE_CLOSED) {
+ /*
+ * Multi-Packet WQE consumes at most two WQE.
+ * mlx5_mpw_new() expects to be able to use such
+ * resources.
+ */
+ if (unlikely(max_wqe < 2))
+ break;
+ max_wqe -= 2;
mlx5_mpw_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
}
@@ -914,11 +948,24 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
unsigned int i = 0;
unsigned int j = 0;
unsigned int max;
+ uint16_t max_wqe;
unsigned int comp;
unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
struct mlx5_mpw mpw = {
.state = MLX5_MPW_STATE_CLOSED,
};
+ /*
+ * Compute the maximum number of WQE which can be consumed by inline
+ * code.
+ * - 2 DSEG for:
+ * - 1 control segment,
+ * - 1 Ethernet segment,
+ * - N Dseg from the inline request.
+ */
+ const unsigned int wqe_inl_n =
+ ((2 * MLX5_WQE_DWORD_SIZE +
+ txq->max_inline * RTE_CACHE_LINE_SIZE) +
+ RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
if (unlikely(!pkts_n))
return 0;
@@ -950,6 +997,11 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
break;
max -= segs_n;
--pkts_n;
+ /*
+ * Compute max_wqe in case less WQE were consumed in previous
+ * iteration.
+ */
+ max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
@@ -975,9 +1027,20 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
if (mpw.state == MLX5_MPW_STATE_CLOSED) {
if ((segs_n != 1) ||
(length > inline_room)) {
+ /*
+ * Multi-Packet WQE consumes at most two WQE.
+ * mlx5_mpw_new() expects to be able to use
+ * such resources.
+ */
+ if (unlikely(max_wqe < 2))
+ break;
+ max_wqe -= 2;
mlx5_mpw_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
} else {
+ if (unlikely(max_wqe < wqe_inl_n))
+ break;
+ max_wqe -= wqe_inl_n;
mlx5_mpw_inline_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 302ca49..41a34d7 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -249,6 +249,7 @@ struct txq {
uint16_t elts_comp; /* Counter since last completion request. */
uint16_t cq_ci; /* Consumer index for completion queue. */
uint16_t wqe_ci; /* Consumer index for work queue. */
+ uint16_t wqe_pi; /* Producer index for work queue. */
uint16_t elts_n:4; /* (*elts)[] length (in log2). */
uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */
--
2.1.4
^ permalink raw reply [flat|nested] 2+ messages in thread
* [dpdk-stable] [PATCH 3/3] net/mlx5: fix inline WQE consumption
[not found] <fb871683b18cb5a8db2145ea71f0d9a41d6f8683.1486031307.git.nelio.laranjeiro@6wind.com>
2017-02-02 10:34 ` [dpdk-stable] [PATCH 2/3] net/mlx5: fix Tx WQE corruption caused by starvation Nelio Laranjeiro
@ 2017-02-02 10:34 ` Nelio Laranjeiro
1 sibling, 0 replies; 2+ messages in thread
From: Nelio Laranjeiro @ 2017-02-02 10:34 UTC (permalink / raw)
To: dev; +Cc: Adrien Mazarguil, stable, Yongseok Koh
For some sizes of packets, the number of bytes copied in the work queue
element could be greater than the available size of the inline. In such
situation it could consumed one more work queue element where it should
not.
Fixes: 0e8679fcddc4 ("net/mlx5: fix inline logic")
Cc: stable@dpdk.org
Reported-by: Elad Persiko <eladpe@mellanox.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 31 ++++++++++++-------------------
1 file changed, 12 insertions(+), 19 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index a0e15ac..40f2c47 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -466,33 +466,28 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
addr += pkt_inline_sz;
}
/* Inline if enough room. */
- if (txq->max_inline != 0) {
+ if (txq->max_inline) {
uintptr_t end = (uintptr_t)
(((uintptr_t)txq->wqes) +
(1 << txq->wqe_n) * MLX5_WQE_SIZE);
- uint16_t max_inline =
- txq->max_inline * RTE_CACHE_LINE_SIZE;
- uint16_t room;
+ unsigned int max_inline = txq->max_inline *
+ RTE_CACHE_LINE_SIZE -
+ MLX5_WQE_DWORD_SIZE;
+ uintptr_t addr_end = (addr + max_inline) &
+ ~(RTE_CACHE_LINE_SIZE - 1);
+ unsigned int copy_b = (addr_end > addr) ?
+ RTE_MIN((addr_end - addr), length) :
+ 0;
- /*
- * raw starts two bytes before the boundary to
- * continue the above copy of packet data.
- */
raw += MLX5_WQE_DWORD_SIZE;
- room = end - (uintptr_t)raw;
- if (room > max_inline) {
- uintptr_t addr_end = (addr + max_inline) &
- ~(RTE_CACHE_LINE_SIZE - 1);
- unsigned int copy_b =
- RTE_MIN((addr_end - addr), length);
- uint16_t n;
-
+ if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
/*
* One Dseg remains in the current WQE. To
* keep the computation positive, it is
* removed after the bytes to Dseg conversion.
*/
- n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
+ uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
+
if (unlikely(max_wqe < n))
break;
max_wqe -= n;
@@ -500,8 +495,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
addr += copy_b;
length -= copy_b;
pkt_inline_sz += copy_b;
- /* Sanity check. */
- assert(addr <= addr_end);
}
/*
* 2 DWORDs consumed by the WQE header + ETH segment +
--
2.1.4
^ permalink raw reply [flat|nested] 2+ messages in thread