* [dpdk-stable] [PATCH 2/2] net/mlx5: enforce Tx num of segments limitation
2017-08-23 7:33 [dpdk-stable] [PATCH 1/2] net/mlx5: fix num seg assumption on vPMD Shahaf Shuler
@ 2017-08-23 7:33 ` Shahaf Shuler
2017-08-24 13:28 ` Nélio Laranjeiro
2017-08-24 13:23 ` [dpdk-stable] [PATCH 1/2] net/mlx5: fix num seg assumption on vPMD Nélio Laranjeiro
` (2 subsequent siblings)
3 siblings, 1 reply; 29+ messages in thread
From: Shahaf Shuler @ 2017-08-23 7:33 UTC (permalink / raw)
To: nelio.laranjeiro, adrien.mazarguil; +Cc: dev, yskoh, stable
Mellanox NICs has a limitation on the number of mbuf segments a multi
segment mbuf can have. The max number depends on the Tx offloads requested.
The current code not enforce such limitation, which might cause
malformed WQEs to be written to the device.
This commit adds verification for the number of mbuf segments posted
to the device. In case of overflow the packet will not be sent.
Debug prints were added to help application identify the cause for such
case.
Cc: stable@dpdk.org
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
This patch should be applied only after the series:
http://dpdk.org/dev/patchwork/patch/27367/
---
drivers/net/mlx5/mlx5_defs.h | 3 ++-
drivers/net/mlx5/mlx5_prm.h | 3 +++
drivers/net/mlx5/mlx5_rxtx.c | 30 +++++++++++++++++++++++++++---
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 8 ++++++++
drivers/net/mlx5/mlx5_txq.c | 27 +++++++++++++++++++++++++++
5 files changed, 67 insertions(+), 4 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f65..7d2a65099 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -100,7 +100,8 @@
/*
* Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhacned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause wqe DS to overlap.
*/
#define MLX5_VPMD_TX_MAX_BURST 32U
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 608072f7e..87244e7db 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -154,6 +154,9 @@
/* Default mark value used when none is provided. */
#define MLX5_FLOW_MARK_DEFAULT 0xffffff
+/* Maximum number of DS in WQE. */
+#define MLX5_MAX_DS (63)
+
/* Subset of struct mlx5_wqe_eth_seg. */
struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b07bcd118..77a789c2c 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -664,6 +664,15 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
else
j += sg;
next_pkt:
+ if (ds > MLX5_MAX_DS) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments "
+ "wqe.ds = %d, wqe.inline_sz = %d",
+ (void *)pkts, (*pkts)->nb_segs, ds,
+ pkt_inline_sz);
+#endif
+ break;
+ }
++elts_head;
++pkts;
++i;
@@ -850,8 +859,13 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments",
+ (void *)buf, segs_n);
+#endif
break;
+ }
max_elts -= segs_n;
--pkts_n;
/* Should we enable HW CKSUM offload */
@@ -1071,8 +1085,13 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments",
+ (void *)buf, segs_n);
+#endif
break;
+ }
max_elts -= segs_n;
--pkts_n;
/*
@@ -1360,8 +1379,13 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts - j < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments",
+ (void *)buf, segs_n);
+#endif
break;
+ }
/* Should we enable HW CKSUM offload. */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 30727e6dd..6f66517df 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -257,6 +257,13 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (segs_n == 1 ||
max_elts < segs_n || max_wqe < 2)
break;
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments",
+ (void *)buf, segs_n);
+#endif
+ break;
+ }
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
if (buf->ol_flags &
@@ -374,6 +381,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+ assert(pkts_n <= MLX5_MAX_DS - nb_dword_in_hdr);
if (unlikely(!pkts_n))
return 0;
elts = &(*txq->elts)[elts_head & elts_m];
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 98aaa7ca0..6ea02485f 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -295,6 +295,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
};
if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+ unsigned int ds_cnt;
+
tmpl.txq.max_inline =
((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE);
@@ -327,6 +329,31 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
attr.init.cap.max_inline_data =
tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
}
+ /*
+ * Check if the inline size is too large in a way which
+ * can make the wqe DS to overflow.
+ * Considering in calculation:
+ * WQE CTRL (1 DS)
+ * WQE ETH (1 DS)
+ * inline part (N DS)
+ */
+ ds_cnt = 2 +
+ (attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+ if (ds_cnt > MLX5_MAX_DS) {
+ unsigned int max_inline = (MLX5_MAX_DS - 2) *
+ MLX5_WQE_DWORD_SIZE;
+
+ /* Ceil down*/
+ max_inline = max_inline - (max_inline %
+ RTE_CACHE_LINE_SIZE);
+ WARN("txq inline is too large (%d) setting it to "
+ "the maximum possible: %d\n",
+ priv->txq_inline, max_inline);
+ tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+ attr.init.cap.max_inline_data = max_inline;
+ if (priv->mps == MLX5_MPW_ENHANCED)
+ tmpl.txq.inline_max_packet_sz = max_inline;
+ }
}
if (priv->tso) {
attr.init.max_tso_header =
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH 2/2] net/mlx5: enforce Tx num of segments limitation
2017-08-23 7:33 ` [dpdk-stable] [PATCH 2/2] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
@ 2017-08-24 13:28 ` Nélio Laranjeiro
0 siblings, 0 replies; 29+ messages in thread
From: Nélio Laranjeiro @ 2017-08-24 13:28 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: adrien.mazarguil, dev, yskoh, stable
On Wed, Aug 23, 2017 at 10:33:58AM +0300, Shahaf Shuler wrote:
> Mellanox NICs has a limitation on the number of mbuf segments a multi
> segment mbuf can have. The max number depends on the Tx offloads requested.
>
> The current code not enforce such limitation, which might cause
> malformed WQEs to be written to the device.
Avoid acronyms in the commit message (at least on first occurrence), not all
people knows what a WQE is and getting such information is not easy.
> This commit adds verification for the number of mbuf segments posted
> to the device. In case of overflow the packet will not be sent.
> Debug prints were added to help application identify the cause for such
> case.
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
>
> This patch should be applied only after the series:
> http://dpdk.org/dev/patchwork/patch/27367/
>
> ---
> drivers/net/mlx5/mlx5_defs.h | 3 ++-
> drivers/net/mlx5/mlx5_prm.h | 3 +++
> drivers/net/mlx5/mlx5_rxtx.c | 30 +++++++++++++++++++++++++++---
> drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 8 ++++++++
> drivers/net/mlx5/mlx5_txq.c | 27 +++++++++++++++++++++++++++
> 5 files changed, 67 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
> index 608072f7e..87244e7db 100644
> --- a/drivers/net/mlx5/mlx5_prm.h
> +++ b/drivers/net/mlx5/mlx5_prm.h
> @@ -154,6 +154,9 @@
> /* Default mark value used when none is provided. */
> #define MLX5_FLOW_MARK_DEFAULT 0xffffff
>
> +/* Maximum number of DS in WQE. */
> +#define MLX5_MAX_DS (63)
> +
Why the parenthesis?
Thanks,
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH 1/2] net/mlx5: fix num seg assumption on vPMD
2017-08-23 7:33 [dpdk-stable] [PATCH 1/2] net/mlx5: fix num seg assumption on vPMD Shahaf Shuler
2017-08-23 7:33 ` [dpdk-stable] [PATCH 2/2] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
@ 2017-08-24 13:23 ` Nélio Laranjeiro
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 " Shahaf Shuler
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 2/2] " Shahaf Shuler
3 siblings, 0 replies; 29+ messages in thread
From: Nélio Laranjeiro @ 2017-08-24 13:23 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: adrien.mazarguil, dev, yskoh, stable
On Wed, Aug 23, 2017 at 10:33:57AM +0300, Shahaf Shuler wrote:
> vPMD Tx function assumes that after the scatter of the
> multi-segment packets the next packet will be a single segment packet.
>
> This is not current as the function can return due to lack of resources
> without sending all of the multi-segment mbufs sequence.
>
> Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
> Cc: stable@dpdk.org
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
>
> This patch should be applied only after the series:
> http://dpdk.org/dev/patchwork/patch/27367/
>
> ---
> drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 8 +++++---
> 1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> index 8560f745a..30727e6dd 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> @@ -119,8 +119,7 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
> }
>
> /**
> - * Count the number of continuous single segment packets. The first packet must
> - * be a single segment packet.
> + * Count the number of continuous single segment packets.
> *
> * @param pkts
> * Pointer to array of packets.
> @@ -137,7 +136,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
>
> if (!pkts_n)
> return 0;
> - assert(NB_SEGS(pkts[0]) == 1);
> + if (NB_SEGS(pkts[0]) > 1)
> + return 0;
> /* Count the number of continuous single segment packets. */
> for (pos = 1; pos < pkts_n; ++pos)
> if (NB_SEGS(pkts[pos]) > 1)
> @@ -502,6 +502,8 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
> n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
> if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS))
> n = txq_check_multiseg(&pkts[nb_tx], n);
> + if (!n)
> + break;
> if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
> n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
> ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
> --
> 2.12.0
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v2 1/2] net/mlx5: fix num seg assumption on vPMD
2017-08-23 7:33 [dpdk-stable] [PATCH 1/2] net/mlx5: fix num seg assumption on vPMD Shahaf Shuler
2017-08-23 7:33 ` [dpdk-stable] [PATCH 2/2] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
2017-08-24 13:23 ` [dpdk-stable] [PATCH 1/2] net/mlx5: fix num seg assumption on vPMD Nélio Laranjeiro
@ 2017-08-30 7:07 ` Shahaf Shuler
2017-09-04 14:57 ` Nélio Laranjeiro
` (3 more replies)
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 2/2] " Shahaf Shuler
3 siblings, 4 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-08-30 7:07 UTC (permalink / raw)
To: nelio.laranjeiro, adrien.mazarguil; +Cc: dev, stable
vPMD Tx function assumes that after the scatter of the
multi-segment packets the next packet will be a single segment packet.
This is not current as the function can return due to lack of resources
without sending all of the multi-segment mbufs sequence.
Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
Cc: stable@dpdk.org
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
This patch should be applied only after the series:
http://dpdk.org/dev/patchwork/patch/27367/
on v2:
- different approach to fix the issue.
---
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 37854a73b..f89762ff8 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -112,8 +112,7 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
}
/**
- * Count the number of continuous single segment packets. The first packet must
- * be a single segment packet.
+ * Count the number of continuous single segment packets.
*
* @param pkts
* Pointer to array of packets.
@@ -130,9 +129,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
if (!pkts_n)
return 0;
- assert(NB_SEGS(pkts[0]) == 1);
/* Count the number of continuous single segment packets. */
- for (pos = 1; pos < pkts_n; ++pos)
+ for (pos = 0; pos < pkts_n; ++pos)
if (NB_SEGS(pkts[pos]) > 1)
break;
return pos;
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v2 1/2] net/mlx5: fix num seg assumption on vPMD
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 " Shahaf Shuler
@ 2017-09-04 14:57 ` Nélio Laranjeiro
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 1/3] " Shahaf Shuler
` (2 subsequent siblings)
3 siblings, 0 replies; 29+ messages in thread
From: Nélio Laranjeiro @ 2017-09-04 14:57 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: adrien.mazarguil, dev, stable
On Wed, Aug 30, 2017 at 10:07:07AM +0300, Shahaf Shuler wrote:
> vPMD Tx function assumes that after the scatter of the
> multi-segment packets the next packet will be a single segment packet.
>
> This is not current as the function can return due to lack of resources
> without sending all of the multi-segment mbufs sequence.
>
> Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
> Cc: stable@dpdk.org
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> Acked-by: Yongseok Koh <yskoh@mellanox.com>
> ---
> This patch should be applied only after the series:
> http://dpdk.org/dev/patchwork/patch/27367/
>
> on v2:
> - different approach to fix the issue.
> ---
> drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> index 37854a73b..f89762ff8 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> @@ -112,8 +112,7 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
> }
>
> /**
> - * Count the number of continuous single segment packets. The first packet must
> - * be a single segment packet.
> + * Count the number of continuous single segment packets.
> *
> * @param pkts
> * Pointer to array of packets.
> @@ -130,9 +129,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
>
> if (!pkts_n)
> return 0;
> - assert(NB_SEGS(pkts[0]) == 1);
> /* Count the number of continuous single segment packets. */
> - for (pos = 1; pos < pkts_n; ++pos)
> + for (pos = 0; pos < pkts_n; ++pos)
> if (NB_SEGS(pkts[pos]) > 1)
> break;
> return pos;
> --
> 2.12.0
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v3 1/3] net/mlx5: fix num seg assumption on vPMD
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 " Shahaf Shuler
2017-09-04 14:57 ` Nélio Laranjeiro
@ 2017-09-11 12:50 ` Shahaf Shuler
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 1/4] " Shahaf Shuler
` (3 more replies)
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 2/3] net/mlx5: fix Tx stats error counter Shahaf Shuler
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 3/3] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
3 siblings, 4 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-11 12:50 UTC (permalink / raw)
To: nelio.laranjeiro; +Cc: dev, stable, yskoh
vPMD Tx function assumes that after the scatter of the
multi-segment packets the next packet will be a single segment packet.
This is not current as the function can return due to lack of resources
without sending all of the multi-segment mbufs sequence.
Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
Cc: stable@dpdk.org
Cc: yskoh@mellanox.com
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 37854a73b..f89762ff8 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -112,8 +112,7 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
}
/**
- * Count the number of continuous single segment packets. The first packet must
- * be a single segment packet.
+ * Count the number of continuous single segment packets.
*
* @param pkts
* Pointer to array of packets.
@@ -130,9 +129,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
if (!pkts_n)
return 0;
- assert(NB_SEGS(pkts[0]) == 1);
/* Count the number of continuous single segment packets. */
- for (pos = 1; pos < pkts_n; ++pos)
+ for (pos = 0; pos < pkts_n; ++pos)
if (NB_SEGS(pkts[pos]) > 1)
break;
return pos;
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v4 1/4] net/mlx5: fix num seg assumption on vPMD
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 1/3] " Shahaf Shuler
@ 2017-09-13 10:50 ` Shahaf Shuler
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 " Shahaf Shuler
` (3 more replies)
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 2/4] net/mlx5: fix Tx stats error counter definition Shahaf Shuler
` (2 subsequent siblings)
3 siblings, 4 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-13 10:50 UTC (permalink / raw)
To: nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
vPMD Tx function assumes that after the scatter of the
multi-segment packets the next packet will be a single segment packet.
This is not current as the function can return due to lack of resources
without sending all of the multi-segment mbufs sequence.
Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
Cc: stable@dpdk.org
Cc: yskoh@mellanox.com
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 37854a73b..f89762ff8 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -112,8 +112,7 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
}
/**
- * Count the number of continuous single segment packets. The first packet must
- * be a single segment packet.
+ * Count the number of continuous single segment packets.
*
* @param pkts
* Pointer to array of packets.
@@ -130,9 +129,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
if (!pkts_n)
return 0;
- assert(NB_SEGS(pkts[0]) == 1);
/* Count the number of continuous single segment packets. */
- for (pos = 1; pos < pkts_n; ++pos)
+ for (pos = 0; pos < pkts_n; ++pos)
if (NB_SEGS(pkts[pos]) > 1)
break;
return pos;
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v5 1/4] net/mlx5: fix num seg assumption on vPMD
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 1/4] " Shahaf Shuler
@ 2017-09-14 10:50 ` Shahaf Shuler
2017-09-15 10:51 ` Ferruh Yigit
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 2/4] net/mlx5: fix Tx stats error counter definition Shahaf Shuler
` (2 subsequent siblings)
3 siblings, 1 reply; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-14 10:50 UTC (permalink / raw)
To: nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
vPMD Tx function assumes that after the scatter of the
multi-segment packets the next packet will be a single segment packet.
This is not current as the function can return due to lack of resources
without sending all of the multi-segment mbufs sequence.
Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
Cc: stable@dpdk.org
Cc: yskoh@mellanox.com
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 37854a73b..f89762ff8 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -112,8 +112,7 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
}
/**
- * Count the number of continuous single segment packets. The first packet must
- * be a single segment packet.
+ * Count the number of continuous single segment packets.
*
* @param pkts
* Pointer to array of packets.
@@ -130,9 +129,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
if (!pkts_n)
return 0;
- assert(NB_SEGS(pkts[0]) == 1);
/* Count the number of continuous single segment packets. */
- for (pos = 1; pos < pkts_n; ++pos)
+ for (pos = 0; pos < pkts_n; ++pos)
if (NB_SEGS(pkts[pos]) > 1)
break;
return pos;
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v5 1/4] net/mlx5: fix num seg assumption on vPMD
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 " Shahaf Shuler
@ 2017-09-15 10:51 ` Ferruh Yigit
0 siblings, 0 replies; 29+ messages in thread
From: Ferruh Yigit @ 2017-09-15 10:51 UTC (permalink / raw)
To: Shahaf Shuler, nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
On 9/14/2017 11:50 AM, Shahaf Shuler wrote:
> vPMD Tx function assumes that after the scatter of the
> multi-segment packets the next packet will be a single segment packet.
>
> This is not current as the function can return due to lack of resources
> without sending all of the multi-segment mbufs sequence.
>
> Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
> Cc: stable@dpdk.org
> Cc: yskoh@mellanox.com
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> Acked-by: Yongseok Koh <yskoh@mellanox.com>
> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Series applied to dpdk-next-net/master, thanks.
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v5 2/4] net/mlx5: fix Tx stats error counter definition
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 1/4] " Shahaf Shuler
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 " Shahaf Shuler
@ 2017-09-14 10:50 ` Shahaf Shuler
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 3/4] net/mlx5: fix Tx stats error counter logic Shahaf Shuler
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 4/4] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
3 siblings, 0 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-14 10:50 UTC (permalink / raw)
To: nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
The current Tx error counter counts, according to its description,
the total number of packets not sent when TX ring full. It is reported
to application as part of oerrors field.
The drop due to full ring is not the statistic that should be set on
oerrors field. Such number can be counted by the application using the
return value of the Tx burst function.
The number that should be set there is the number of packets the device
could not transmit in any way, even when it has resources.
Therefore, replace this counter to count the total number of failed
transmitted packets.
Fixes: 87011737b715 ("mlx5: add software counters")
Cc: stable@dpdk.org
Cc: adrien.mazarguil@6wind.com
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
on v4:
- Split fix Tx stats commit into 2 seperate patches.
---
drivers/net/mlx5/mlx5_rxtx.h | 2 +-
drivers/net/mlx5/mlx5_stats.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 033e70f25..107ada0f5 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -75,7 +75,7 @@ struct mlx5_txq_stats {
uint64_t opackets; /**< Total of successfully sent packets. */
uint64_t obytes; /**< Total of successfully sent bytes. */
#endif
- uint64_t odropped; /**< Total of packets not sent when TX ring full. */
+ uint64_t oerrors; /**< Total number of failed transmitted packets. */
};
/* Flow director queue structure. */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index d443e1336..06348c8a1 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -360,13 +360,13 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
tmp.q_opackets[idx] += txq->stats.opackets;
tmp.q_obytes[idx] += txq->stats.obytes;
#endif
- tmp.q_errors[idx] += txq->stats.odropped;
+ tmp.q_errors[idx] += txq->stats.oerrors;
}
#ifdef MLX5_PMD_SOFT_COUNTERS
tmp.opackets += txq->stats.opackets;
tmp.obytes += txq->stats.obytes;
#endif
- tmp.oerrors += txq->stats.odropped;
+ tmp.oerrors += txq->stats.oerrors;
}
#ifndef MLX5_PMD_SOFT_COUNTERS
/* FIXME: retrieve and add hardware counters. */
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v5 3/4] net/mlx5: fix Tx stats error counter logic
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 1/4] " Shahaf Shuler
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 " Shahaf Shuler
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 2/4] net/mlx5: fix Tx stats error counter definition Shahaf Shuler
@ 2017-09-14 10:50 ` Shahaf Shuler
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 4/4] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
3 siblings, 0 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-14 10:50 UTC (permalink / raw)
To: nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
Tx error counter lacks the logic of incrementation, making it useless for
applications.
Fixes: 87011737b715 ("mlx5: add software counters")
Cc: stable@dpdk.org
Cc: adrien.mazarguil@6wind.com
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index fe9e7eac0..7567f2329 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -406,8 +406,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
#ifdef MLX5_PMD_SOFT_COUNTERS
total_length = length;
#endif
- if (length < (MLX5_WQE_DWORD_SIZE + 2))
+ if (length < (MLX5_WQE_DWORD_SIZE + 2)) {
+ txq->stats.oerrors++;
break;
+ }
/* Update element. */
(*txq->elts)[elts_head & elts_m] = buf;
/* Prefetch next buffer data. */
@@ -481,8 +483,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
cs_flags |= MLX5_ETH_WQE_L4_CSUM;
}
if (unlikely(tso_header_sz >
- MLX5_MAX_TSO_HEADER))
+ MLX5_MAX_TSO_HEADER)) {
+ txq->stats.oerrors++;
break;
+ }
copy_b = tso_header_sz - pkt_inline_sz;
/* First seg must contain all headers. */
assert(copy_b <= length);
@@ -843,8 +847,10 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
max_elts -= segs_n;
--pkts_n;
/* Should we enable HW CKSUM offload */
@@ -1064,8 +1070,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
max_elts -= segs_n;
--pkts_n;
/*
@@ -1353,8 +1361,10 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts - j < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
/* Should we enable HW CKSUM offload. */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v5 4/4] net/mlx5: enforce Tx num of segments limitation
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 1/4] " Shahaf Shuler
` (2 preceding siblings ...)
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 3/4] net/mlx5: fix Tx stats error counter logic Shahaf Shuler
@ 2017-09-14 10:50 ` Shahaf Shuler
2017-09-14 19:21 ` Yongseok Koh
2017-09-15 8:11 ` Nélio Laranjeiro
3 siblings, 2 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-14 10:50 UTC (permalink / raw)
To: nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
Mellanox NICs has a limitation on the number of mbuf segments a multi
segment mbuf can have. The max number depends on the Tx offloads requested.
The current code not enforce such limitation, which might cause
malformed work requests to be written to the device.
This commit adds verification for the number of mbuf segments posted
to the device. In case of overflow the packet will not be sent.
In addition update the nic documentation with the limitation.
Considering device limitation is 63 data segments in a work request, the
maximum number of segment in mbuf was calculated taking TSO as the worst
case:
max_nb_segs = 63 - (control_segment + ethernet segment +
TSO headers inline + inline segment +
extra inline to align to cacheline)
Cc: stable@dpdk.org
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
on v5:
- fixed documentation.
- changed MLX5_MAX_DS macro to MLX5_DSEG_MAX.
---
doc/guides/nics/mlx5.rst | 4 ++++
drivers/net/mlx5/mlx5_defs.h | 3 ++-
drivers/net/mlx5/mlx5_prm.h | 3 +++
drivers/net/mlx5/mlx5_rxtx.c | 4 ++++
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 5 +++++
drivers/net/mlx5/mlx5_txq.c | 24 ++++++++++++++++++++++++
6 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f4cb18bca..c6a196c2e 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -124,6 +124,10 @@ Limitations
Will match any ipv4 packet (VLAN included).
+- A multi segment packet must have less than 6 segments in case the Tx burst function
+ is set to multi-packet send or Enhanced multi-packet send. Otherwise it must have
+ less than 50 segments.
+
Configuration
-------------
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f65..59ff00d6b 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -100,7 +100,8 @@
/*
* Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause WQE DS to overlap.
*/
#define MLX5_VPMD_TX_MAX_BURST 32U
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 608072f7e..c61e4d86c 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -154,6 +154,9 @@
/* Default mark value used when none is provided. */
#define MLX5_FLOW_MARK_DEFAULT 0xffffff
+/* Maximum number of DS in WQE. */
+#define MLX5_DSEG_MAX 63
+
/* Subset of struct mlx5_wqe_eth_seg. */
struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 7567f2329..5b6bd6ba6 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -661,6 +661,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
else
j += sg;
next_pkt:
+ if (ds > MLX5_DSEG_MAX) {
+ txq->stats.oerrors++;
+ break;
+ }
++elts_head;
++pkts;
++i;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index f89762ff8..39c732515 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -248,6 +248,10 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (segs_n == 1 ||
max_elts < segs_n || max_wqe < 2)
break;
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
+ break;
+ }
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
if (buf->ol_flags &
@@ -365,6 +369,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+ assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr);
if (unlikely(!pkts_n))
return 0;
elts = &(*txq->elts)[elts_head & elts_m];
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4b0b532b1..b4c5b10fb 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -288,6 +288,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
};
if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+ unsigned int ds_cnt;
+
tmpl.txq.max_inline =
((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE);
@@ -320,6 +322,28 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
attr.init.cap.max_inline_data =
tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
}
+ /*
+ * Check if the inline size is too large in a way which
+ * can make the WQE DS to overflow.
+ * Considering in calculation:
+ * WQE CTRL (1 DS)
+ * WQE ETH (1 DS)
+ * Inline part (N DS)
+ */
+ ds_cnt = 2 +
+ (attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+ if (ds_cnt > MLX5_DSEG_MAX) {
+ unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
+ MLX5_WQE_DWORD_SIZE;
+
+ max_inline = max_inline - (max_inline %
+ RTE_CACHE_LINE_SIZE);
+ WARN("txq inline is too large (%d) setting it to "
+ "the maximum possible: %d\n",
+ priv->txq_inline, max_inline);
+ tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+ attr.init.cap.max_inline_data = max_inline;
+ }
}
if (priv->tso) {
attr.init.max_tso_header =
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v5 4/4] net/mlx5: enforce Tx num of segments limitation
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 4/4] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
@ 2017-09-14 19:21 ` Yongseok Koh
2017-09-15 8:11 ` Nélio Laranjeiro
1 sibling, 0 replies; 29+ messages in thread
From: Yongseok Koh @ 2017-09-14 19:21 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: Nélio Laranjeiro, Adrien Mazarguil, dev, stable
> On Sep 14, 2017, at 3:50 AM, Shahaf Shuler <shahafs@mellanox.com> wrote:
>
> Mellanox NICs has a limitation on the number of mbuf segments a multi
> segment mbuf can have. The max number depends on the Tx offloads requested.
>
> The current code not enforce such limitation, which might cause
> malformed work requests to be written to the device.
>
> This commit adds verification for the number of mbuf segments posted
> to the device. In case of overflow the packet will not be sent.
>
> In addition update the nic documentation with the limitation.
> Considering device limitation is 63 data segments in a work request, the
> maximum number of segment in mbuf was calculated taking TSO as the worst
> case:
>
> max_nb_segs = 63 - (control_segment + ethernet segment +
> TSO headers inline + inline segment +
> extra inline to align to cacheline)
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
--
Thanks,
Yongseok
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v5 4/4] net/mlx5: enforce Tx num of segments limitation
2017-09-14 10:50 ` [dpdk-stable] [PATCH v5 4/4] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
2017-09-14 19:21 ` Yongseok Koh
@ 2017-09-15 8:11 ` Nélio Laranjeiro
1 sibling, 0 replies; 29+ messages in thread
From: Nélio Laranjeiro @ 2017-09-15 8:11 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: yskoh, adrien.mazarguil, dev, stable
On Thu, Sep 14, 2017 at 01:50:39PM +0300, Shahaf Shuler wrote:
> Mellanox NICs has a limitation on the number of mbuf segments a multi
> segment mbuf can have. The max number depends on the Tx offloads requested.
>
> The current code not enforce such limitation, which might cause
> malformed work requests to be written to the device.
>
> This commit adds verification for the number of mbuf segments posted
> to the device. In case of overflow the packet will not be sent.
>
> In addition update the nic documentation with the limitation.
> Considering device limitation is 63 data segments in a work request, the
> maximum number of segment in mbuf was calculated taking TSO as the worst
> case:
>
> max_nb_segs = 63 - (control_segment + ethernet segment +
> TSO headers inline + inline segment +
> extra inline to align to cacheline)
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v4 2/4] net/mlx5: fix Tx stats error counter definition
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 1/3] " Shahaf Shuler
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 1/4] " Shahaf Shuler
@ 2017-09-13 10:50 ` Shahaf Shuler
2017-09-13 17:59 ` Yongseok Koh
2017-09-14 8:11 ` Nélio Laranjeiro
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 3/4] net/mlx5: fix Tx stats error counter logic Shahaf Shuler
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 4/4] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
3 siblings, 2 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-13 10:50 UTC (permalink / raw)
To: nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
The current Tx error counter counts, according to its description,
the total number of packets not sent when TX ring full. It is reported
to application as part of oerrors field.
The drop due to full ring is not the statistic that should be set on
oerrors field. Such number can be counted by the application using the
return value of the Tx burst function.
The number that should be set there is the number of packets the device
could not transmit in any way, even when it has resources.
Therefore, replace this counter to count the total number of failed
transmitted packets.
Fixes: 87011737b715 ("mlx5: add software counters")
Cc: stable@dpdk.org
Cc: adrien.mazarguil@6wind.com
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
on v4:
- Split fix Tx stats commit into 2 seperate patches.
---
drivers/net/mlx5/mlx5_rxtx.h | 2 +-
drivers/net/mlx5/mlx5_stats.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 033e70f25..107ada0f5 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -75,7 +75,7 @@ struct mlx5_txq_stats {
uint64_t opackets; /**< Total of successfully sent packets. */
uint64_t obytes; /**< Total of successfully sent bytes. */
#endif
- uint64_t odropped; /**< Total of packets not sent when TX ring full. */
+ uint64_t oerrors; /**< Total number of failed transmitted packets. */
};
/* Flow director queue structure. */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index d443e1336..06348c8a1 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -360,13 +360,13 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
tmp.q_opackets[idx] += txq->stats.opackets;
tmp.q_obytes[idx] += txq->stats.obytes;
#endif
- tmp.q_errors[idx] += txq->stats.odropped;
+ tmp.q_errors[idx] += txq->stats.oerrors;
}
#ifdef MLX5_PMD_SOFT_COUNTERS
tmp.opackets += txq->stats.opackets;
tmp.obytes += txq->stats.obytes;
#endif
- tmp.oerrors += txq->stats.odropped;
+ tmp.oerrors += txq->stats.oerrors;
}
#ifndef MLX5_PMD_SOFT_COUNTERS
/* FIXME: retrieve and add hardware counters. */
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v4 2/4] net/mlx5: fix Tx stats error counter definition
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 2/4] net/mlx5: fix Tx stats error counter definition Shahaf Shuler
@ 2017-09-13 17:59 ` Yongseok Koh
2017-09-14 8:11 ` Nélio Laranjeiro
1 sibling, 0 replies; 29+ messages in thread
From: Yongseok Koh @ 2017-09-13 17:59 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: Nélio Laranjeiro, Adrien Mazarguil, dev, stable
> On Sep 13, 2017, at 3:50 AM, Shahaf Shuler <shahafs@mellanox.com> wrote:
>
> The current Tx error counter counts, according to its description,
> the total number of packets not sent when TX ring full. It is reported
> to application as part of oerrors field.
>
> The drop due to full ring is not the statistic that should be set on
> oerrors field. Such number can be counted by the application using the
> return value of the Tx burst function.
> The number that should be set there is the number of packets the device
> could not transmit in any way, even when it has resources.
>
> Therefore, replace this counter to count the total number of failed
> transmitted packets.
>
> Fixes: 87011737b715 ("mlx5: add software counters")
> Cc: stable@dpdk.org
> Cc: adrien.mazarguil@6wind.com
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v4 2/4] net/mlx5: fix Tx stats error counter definition
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 2/4] net/mlx5: fix Tx stats error counter definition Shahaf Shuler
2017-09-13 17:59 ` Yongseok Koh
@ 2017-09-14 8:11 ` Nélio Laranjeiro
1 sibling, 0 replies; 29+ messages in thread
From: Nélio Laranjeiro @ 2017-09-14 8:11 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: yskoh, adrien.mazarguil, dev, stable
On Wed, Sep 13, 2017 at 01:50:37PM +0300, Shahaf Shuler wrote:
> The current Tx error counter counts, according to its description,
> the total number of packets not sent when TX ring full. It is reported
> to application as part of oerrors field.
>
> The drop due to full ring is not the statistic that should be set on
> oerrors field. Such number can be counted by the application using the
> return value of the Tx burst function.
> The number that should be set there is the number of packets the device
> could not transmit in any way, even when it has resources.
>
> Therefore, replace this counter to count the total number of failed
> transmitted packets.
>
> Fixes: 87011737b715 ("mlx5: add software counters")
> Cc: stable@dpdk.org
> Cc: adrien.mazarguil@6wind.com
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v4 3/4] net/mlx5: fix Tx stats error counter logic
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 1/3] " Shahaf Shuler
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 1/4] " Shahaf Shuler
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 2/4] net/mlx5: fix Tx stats error counter definition Shahaf Shuler
@ 2017-09-13 10:50 ` Shahaf Shuler
2017-09-13 18:17 ` Yongseok Koh
2017-09-14 8:12 ` Nélio Laranjeiro
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 4/4] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
3 siblings, 2 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-13 10:50 UTC (permalink / raw)
To: nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
Tx error counter lacks the logic of incrementation, making it useless for
applications.
Fixes: 87011737b715 ("mlx5: add software counters")
Cc: stable@dpdk.org
Cc: adrien.mazarguil@6wind.com
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index fe9e7eac0..7567f2329 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -406,8 +406,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
#ifdef MLX5_PMD_SOFT_COUNTERS
total_length = length;
#endif
- if (length < (MLX5_WQE_DWORD_SIZE + 2))
+ if (length < (MLX5_WQE_DWORD_SIZE + 2)) {
+ txq->stats.oerrors++;
break;
+ }
/* Update element. */
(*txq->elts)[elts_head & elts_m] = buf;
/* Prefetch next buffer data. */
@@ -481,8 +483,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
cs_flags |= MLX5_ETH_WQE_L4_CSUM;
}
if (unlikely(tso_header_sz >
- MLX5_MAX_TSO_HEADER))
+ MLX5_MAX_TSO_HEADER)) {
+ txq->stats.oerrors++;
break;
+ }
copy_b = tso_header_sz - pkt_inline_sz;
/* First seg must contain all headers. */
assert(copy_b <= length);
@@ -843,8 +847,10 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
max_elts -= segs_n;
--pkts_n;
/* Should we enable HW CKSUM offload */
@@ -1064,8 +1070,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
max_elts -= segs_n;
--pkts_n;
/*
@@ -1353,8 +1361,10 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts - j < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
/* Should we enable HW CKSUM offload. */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v4 3/4] net/mlx5: fix Tx stats error counter logic
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 3/4] net/mlx5: fix Tx stats error counter logic Shahaf Shuler
@ 2017-09-13 18:17 ` Yongseok Koh
2017-09-14 8:12 ` Nélio Laranjeiro
1 sibling, 0 replies; 29+ messages in thread
From: Yongseok Koh @ 2017-09-13 18:17 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: Nélio Laranjeiro, Adrien Mazarguil, dev, stable
> On Sep 13, 2017, at 3:50 AM, Shahaf Shuler <shahafs@mellanox.com> wrote:
>
> Tx error counter lacks the logic of incrementation, making it useless for
> applications.
>
> Fixes: 87011737b715 ("mlx5: add software counters")
> Cc: stable@dpdk.org
> Cc: adrien.mazarguil@6wind.com
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
Thanks,
Yongseok
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v4 3/4] net/mlx5: fix Tx stats error counter logic
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 3/4] net/mlx5: fix Tx stats error counter logic Shahaf Shuler
2017-09-13 18:17 ` Yongseok Koh
@ 2017-09-14 8:12 ` Nélio Laranjeiro
1 sibling, 0 replies; 29+ messages in thread
From: Nélio Laranjeiro @ 2017-09-14 8:12 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: yskoh, adrien.mazarguil, dev, stable
On Wed, Sep 13, 2017 at 01:50:38PM +0300, Shahaf Shuler wrote:
> Tx error counter lacks the logic of incrementation, making it useless for
> applications.
>
> Fixes: 87011737b715 ("mlx5: add software counters")
> Cc: stable@dpdk.org
> Cc: adrien.mazarguil@6wind.com
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v4 4/4] net/mlx5: enforce Tx num of segments limitation
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 1/3] " Shahaf Shuler
` (2 preceding siblings ...)
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 3/4] net/mlx5: fix Tx stats error counter logic Shahaf Shuler
@ 2017-09-13 10:50 ` Shahaf Shuler
2017-09-13 19:51 ` Yongseok Koh
3 siblings, 1 reply; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-13 10:50 UTC (permalink / raw)
To: nelio.laranjeiro, yskoh, adrien.mazarguil; +Cc: dev, stable
Mellanox NICs has a limitation on the number of mbuf segments a multi
segment mbuf can have. The max number depends on the Tx offloads requested.
The current code not enforce such limitation, which might cause
malformed work requests to be written to the device.
This commit adds verification for the number of mbuf segments posted
to the device. In case of overflow the packet will not be sent.
In addition update the nic documentation with the limitation.
Considering device limitation is 63 data segments in a work request, the
maximum number of segment in mbuf was calculated taking TSO as the worst
case:
max_nb_segs = 63 - (control_segment + ethernet segment +
TSO headers inline + inline segment +
extra inline to align to cacheline)
Cc: stable@dpdk.org
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
doc/guides/nics/mlx5.rst | 2 ++
drivers/net/mlx5/mlx5_defs.h | 3 ++-
drivers/net/mlx5/mlx5_prm.h | 3 +++
drivers/net/mlx5/mlx5_rxtx.c | 4 ++++
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 5 +++++
drivers/net/mlx5/mlx5_txq.c | 27 +++++++++++++++++++++++++++
6 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f4cb18bca..d8244de97 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -124,6 +124,8 @@ Limitations
Will match any ipv4 packet (VLAN included).
+- A multi segment mbuf must have less than 50 segments. That means mbuf->nb_segs < 50.
+
Configuration
-------------
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f65..3de0e5d81 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -100,7 +100,8 @@
/*
* Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause wqe DS to overlap.
*/
#define MLX5_VPMD_TX_MAX_BURST 32U
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 608072f7e..bc2b72333 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -154,6 +154,9 @@
/* Default mark value used when none is provided. */
#define MLX5_FLOW_MARK_DEFAULT 0xffffff
+/* Maximum number of DS in WQE. */
+#define MLX5_MAX_DS 63
+
/* Subset of struct mlx5_wqe_eth_seg. */
struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 7567f2329..fdd7067da 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -661,6 +661,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
else
j += sg;
next_pkt:
+ if (ds > MLX5_MAX_DS) {
+ txq->stats.oerrors++;
+ break;
+ }
++elts_head;
++pkts;
++i;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index f89762ff8..3583e6780 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -248,6 +248,10 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (segs_n == 1 ||
max_elts < segs_n || max_wqe < 2)
break;
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
+ break;
+ }
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
if (buf->ol_flags &
@@ -365,6 +369,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+ assert(pkts_n <= MLX5_MAX_DS - nb_dword_in_hdr);
if (unlikely(!pkts_n))
return 0;
elts = &(*txq->elts)[elts_head & elts_m];
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4b0b532b1..091b1a93d 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -288,6 +288,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
};
if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+ unsigned int ds_cnt;
+
tmpl.txq.max_inline =
((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE);
@@ -320,6 +322,31 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
attr.init.cap.max_inline_data =
tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
}
+ /*
+ * Check if the inline size is too large in a way which
+ * can make the wqe DS to overflow.
+ * Considering in calculation:
+ * WQE CTRL (1 DS)
+ * WQE ETH (1 DS)
+ * inline part (N DS)
+ */
+ ds_cnt = 2 +
+ (attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+ if (ds_cnt > MLX5_MAX_DS) {
+ unsigned int max_inline = (MLX5_MAX_DS - 2) *
+ MLX5_WQE_DWORD_SIZE;
+
+ /* Ceil down*/
+ max_inline = max_inline - (max_inline %
+ RTE_CACHE_LINE_SIZE);
+ WARN("txq inline is too large (%d) setting it to "
+ "the maximum possible: %d\n",
+ priv->txq_inline, max_inline);
+ tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+ attr.init.cap.max_inline_data = max_inline;
+ if (priv->mps == MLX5_MPW_ENHANCED)
+ tmpl.txq.inline_max_packet_sz = max_inline;
+ }
}
if (priv->tso) {
attr.init.max_tso_header =
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v4 4/4] net/mlx5: enforce Tx num of segments limitation
2017-09-13 10:50 ` [dpdk-stable] [PATCH v4 4/4] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
@ 2017-09-13 19:51 ` Yongseok Koh
2017-09-14 5:23 ` Shahaf Shuler
0 siblings, 1 reply; 29+ messages in thread
From: Yongseok Koh @ 2017-09-13 19:51 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: nelio.laranjeiro, adrien.mazarguil, dev, stable
On Wed, Sep 13, 2017 at 01:50:39PM +0300, Shahaf Shuler wrote:
> Mellanox NICs has a limitation on the number of mbuf segments a multi
> segment mbuf can have. The max number depends on the Tx offloads requested.
>
> The current code not enforce such limitation, which might cause
> malformed work requests to be written to the device.
>
> This commit adds verification for the number of mbuf segments posted
> to the device. In case of overflow the packet will not be sent.
>
> In addition update the nic documentation with the limitation.
> Considering device limitation is 63 data segments in a work request, the
> maximum number of segment in mbuf was calculated taking TSO as the worst
> case:
>
> max_nb_segs = 63 - (control_segment + ethernet segment +
> TSO headers inline + inline segment +
> extra inline to align to cacheline)
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
> doc/guides/nics/mlx5.rst | 2 ++
> drivers/net/mlx5/mlx5_defs.h | 3 ++-
> drivers/net/mlx5/mlx5_prm.h | 3 +++
> drivers/net/mlx5/mlx5_rxtx.c | 4 ++++
> drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 5 +++++
> drivers/net/mlx5/mlx5_txq.c | 27 +++++++++++++++++++++++++++
> 6 files changed, 43 insertions(+), 1 deletion(-)
>
> diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
> index f4cb18bca..d8244de97 100644
> --- a/doc/guides/nics/mlx5.rst
> +++ b/doc/guides/nics/mlx5.rst
> @@ -124,6 +124,8 @@ Limitations
>
> Will match any ipv4 packet (VLAN included).
>
> +- A multi segment mbuf must have less than 50 segments. That means mbuf->nb_segs < 50.
Isn't it better to use either "multiple segment packet" or "multi-segment
packet"? Also, more information might be needed here. If MPW/eMPW is enabled,
the code restricts the max number of segments up to MLX5_MPW_DSEG_MAX(5).
> +
> Configuration
> -------------
>
> diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
> index a76bc6f65..3de0e5d81 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -100,7 +100,8 @@
>
> /*
> * Maximum size of burst for vectorized Tx. This is related to the maximum size
> - * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
> + * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
> + * Careful when changing, large value can cause wqe DS to overlap.
wqe -> WQE.
> */
> #define MLX5_VPMD_TX_MAX_BURST 32U
>
> diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
> index 608072f7e..bc2b72333 100644
> --- a/drivers/net/mlx5/mlx5_prm.h
> +++ b/drivers/net/mlx5/mlx5_prm.h
> @@ -154,6 +154,9 @@
> /* Default mark value used when none is provided. */
> #define MLX5_FLOW_MARK_DEFAULT 0xffffff
>
> +/* Maximum number of DS in WQE. */
> +#define MLX5_MAX_DS 63
How about make it consistent with MLX5_MPW_DSEG_MAX by naming MLX5_DSEG_MAX?
> +
> /* Subset of struct mlx5_wqe_eth_seg. */
> struct mlx5_wqe_eth_seg_small {
> uint32_t rsvd0;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 7567f2329..fdd7067da 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -661,6 +661,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
> else
> j += sg;
> next_pkt:
> + if (ds > MLX5_MAX_DS) {
> + txq->stats.oerrors++;
> + break;
> + }
> ++elts_head;
> ++pkts;
> ++i;
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> index f89762ff8..3583e6780 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
> @@ -248,6 +248,10 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
> if (segs_n == 1 ||
> max_elts < segs_n || max_wqe < 2)
> break;
> + if (segs_n > MLX5_MPW_DSEG_MAX) {
> + txq->stats.oerrors++;
> + break;
> + }
> wqe = &((volatile struct mlx5_wqe64 *)
> txq->wqes)[wqe_ci & wq_mask].hdr;
> if (buf->ol_flags &
> @@ -365,6 +369,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
> max_elts = (elts_n - (elts_head - txq->elts_tail));
> max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
> pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
> + assert(pkts_n <= MLX5_MAX_DS - nb_dword_in_hdr);
> if (unlikely(!pkts_n))
> return 0;
> elts = &(*txq->elts)[elts_head & elts_m];
> diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> index 4b0b532b1..091b1a93d 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -288,6 +288,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
> .comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
> };
> if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
> + unsigned int ds_cnt;
> +
> tmpl.txq.max_inline =
> ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
> RTE_CACHE_LINE_SIZE);
> @@ -320,6 +322,31 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
> attr.init.cap.max_inline_data =
> tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
> }
> + /*
> + * Check if the inline size is too large in a way which
> + * can make the wqe DS to overflow.
wqe -> WQE.
> + * Considering in calculation:
> + * WQE CTRL (1 DS)
> + * WQE ETH (1 DS)
> + * inline part (N DS)
inline -> Inline ?
> + */
> + ds_cnt = 2 +
> + (attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
> + if (ds_cnt > MLX5_MAX_DS) {
> + unsigned int max_inline = (MLX5_MAX_DS - 2) *
> + MLX5_WQE_DWORD_SIZE;
> +
> + /* Ceil down*/
Missing space and period. Rather, this comment could be unnecessary as the
following code is so obvious. Or, you might want to explain why you make it
aligned.
> + max_inline = max_inline - (max_inline %
> + RTE_CACHE_LINE_SIZE);
> + WARN("txq inline is too large (%d) setting it to "
> + "the maximum possible: %d\n",
> + priv->txq_inline, max_inline);
> + tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
> + attr.init.cap.max_inline_data = max_inline;
> + if (priv->mps == MLX5_MPW_ENHANCED)
> + tmpl.txq.inline_max_packet_sz = max_inline;
No need to set inline_max_packet_sz. inline_max_packet_sz is to limit the max
size of a packet which can be inlined in eMPW mode. As long as txq->max_inline
is correctly set, txq->inline_max_packet_sz doesn't affect the total number of
DSEGs in a WQE.
Thanks,
Yongseok
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v4 4/4] net/mlx5: enforce Tx num of segments limitation
2017-09-13 19:51 ` Yongseok Koh
@ 2017-09-14 5:23 ` Shahaf Shuler
2017-09-14 8:05 ` Yongseok Koh
0 siblings, 1 reply; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-14 5:23 UTC (permalink / raw)
To: Yongseok Koh; +Cc: Nélio Laranjeiro, Adrien Mazarguil, dev, stable
Hi Yongseok,
Wednesday, September 13, 2017 10:52 PM, Yongseok Koh:
> >
> > +/* Maximum number of DS in WQE. */
> > +#define MLX5_MAX_DS 63
> How about make it consistent with MLX5_MPW_DSEG_MAX by naming
> MLX5_DSEG_MAX?
>
It doesn't have the same meaning.
The MLX5_MPW_DSEG_MAX is to limit the number of mbuf segments (buf->nb_segs) for multi packet wqe. The inline part is taken into account differently.
The MLX_MAX_DS is to limit the number data segments (i.e. MLX5_WQE_DWORD_SIZE) that could be set into a WQE. This includes everything (inline, ctrl seg, eth seg, pointers).
For the regular Tx burst there are many options for different inline sizes which impact on the max number of mbuf segments possible.
BTW - am still not sure why we have the MLX5_MPW_DSEG_MAX limitation.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v4 4/4] net/mlx5: enforce Tx num of segments limitation
2017-09-14 5:23 ` Shahaf Shuler
@ 2017-09-14 8:05 ` Yongseok Koh
0 siblings, 0 replies; 29+ messages in thread
From: Yongseok Koh @ 2017-09-14 8:05 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: Nélio Laranjeiro, Adrien Mazarguil, dev, stable
Thanks!
Yongseok
> On Sep 13, 2017, at 10:23 PM, Shahaf Shuler <shahafs@mellanox.com> wrote:
>
> Hi Yongseok,
>
> Wednesday, September 13, 2017 10:52 PM, Yongseok Koh:
>>>
>>> +/* Maximum number of DS in WQE. */
>>> +#define MLX5_MAX_DS 63
>> How about make it consistent with MLX5_MPW_DSEG_MAX by naming
>> MLX5_DSEG_MAX?
>
> It doesn't have the same meaning.
>
> The MLX5_MPW_DSEG_MAX is to limit the number of mbuf segments (buf->nb_segs) for multi packet wqe. The inline part is taken into account differently.
> The MLX_MAX_DS is to limit the number data segments (i.e. MLX5_WQE_DWORD_SIZE) that could be set into a WQE. This includes everything (inline, ctrl seg, eth seg, pointers).
> For the regular Tx burst there are many options for different inline sizes which impact on the max number of mbuf segments possible.
I should've been clearer. I just suggested a small change from DS to DSEG as I thought DS and DSEG are same. But if it just comes from the field name in WQE Ctrl segment, MLX5_WQE_DS_MAX or MLX5_WQE_CTRL_DS_MAX could be good. I hoped it could be a little more explanatory. I'm not good at naming, will defer to you. :-)
> BTW - am still not sure why we have the MLX5_MPW_DSEG_MAX limitation.
I don't know either. We should discuss it with chip design.
Thanks
Yongseok
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v3 2/3] net/mlx5: fix Tx stats error counter
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 " Shahaf Shuler
2017-09-04 14:57 ` Nélio Laranjeiro
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 1/3] " Shahaf Shuler
@ 2017-09-11 12:50 ` Shahaf Shuler
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 3/3] net/mlx5: enforce Tx num of segments limitation Shahaf Shuler
3 siblings, 0 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-11 12:50 UTC (permalink / raw)
To: nelio.laranjeiro; +Cc: dev, stable, adrien.mazarguil
The current Tx error counter counts, according to its description,
the total number of packets not sent when TX ring full. It is reported to
application as part of oerrors field.
Putting aside the fact there is no logic which increment this counter,
the behaviour of the PMD is wrong.
The drop due to full ring is not the statistic that should be set on
oerrors field. Such number can be counted by the application using the
return value of the Tx burst function.
The number that should be set there is the number of packets the device
could not transmit in any way, even when it has resources.
Therefore, replace this counter to count the total number of failed
transmitted packets.
In addition, add the logic to increment it on the different cases.
Fixes: 87011737b715 ("mlx5: add software counters")
Cc: stable@dpdk.org
Cc: adrien.mazarguil@6wind.com
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
on v3:
- This patch is new on v3 to address the comment provided on v2.
---
drivers/net/mlx5/mlx5_rxtx.c | 20 +++++++++++++++-----
drivers/net/mlx5/mlx5_rxtx.h | 2 +-
drivers/net/mlx5/mlx5_stats.c | 4 ++--
3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index fe9e7eac0..7567f2329 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -406,8 +406,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
#ifdef MLX5_PMD_SOFT_COUNTERS
total_length = length;
#endif
- if (length < (MLX5_WQE_DWORD_SIZE + 2))
+ if (length < (MLX5_WQE_DWORD_SIZE + 2)) {
+ txq->stats.oerrors++;
break;
+ }
/* Update element. */
(*txq->elts)[elts_head & elts_m] = buf;
/* Prefetch next buffer data. */
@@ -481,8 +483,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
cs_flags |= MLX5_ETH_WQE_L4_CSUM;
}
if (unlikely(tso_header_sz >
- MLX5_MAX_TSO_HEADER))
+ MLX5_MAX_TSO_HEADER)) {
+ txq->stats.oerrors++;
break;
+ }
copy_b = tso_header_sz - pkt_inline_sz;
/* First seg must contain all headers. */
assert(copy_b <= length);
@@ -843,8 +847,10 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
max_elts -= segs_n;
--pkts_n;
/* Should we enable HW CKSUM offload */
@@ -1064,8 +1070,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
max_elts -= segs_n;
--pkts_n;
/*
@@ -1353,8 +1361,10 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts - j < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
/* Should we enable HW CKSUM offload. */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 033e70f25..107ada0f5 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -75,7 +75,7 @@ struct mlx5_txq_stats {
uint64_t opackets; /**< Total of successfully sent packets. */
uint64_t obytes; /**< Total of successfully sent bytes. */
#endif
- uint64_t odropped; /**< Total of packets not sent when TX ring full. */
+ uint64_t oerrors; /**< Total number of failed transmitted packets. */
};
/* Flow director queue structure. */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index d443e1336..06348c8a1 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -360,13 +360,13 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
tmp.q_opackets[idx] += txq->stats.opackets;
tmp.q_obytes[idx] += txq->stats.obytes;
#endif
- tmp.q_errors[idx] += txq->stats.odropped;
+ tmp.q_errors[idx] += txq->stats.oerrors;
}
#ifdef MLX5_PMD_SOFT_COUNTERS
tmp.opackets += txq->stats.opackets;
tmp.obytes += txq->stats.obytes;
#endif
- tmp.oerrors += txq->stats.odropped;
+ tmp.oerrors += txq->stats.oerrors;
}
#ifndef MLX5_PMD_SOFT_COUNTERS
/* FIXME: retrieve and add hardware counters. */
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v3 3/3] net/mlx5: enforce Tx num of segments limitation
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 " Shahaf Shuler
` (2 preceding siblings ...)
2017-09-11 12:50 ` [dpdk-stable] [PATCH v3 2/3] net/mlx5: fix Tx stats error counter Shahaf Shuler
@ 2017-09-11 12:50 ` Shahaf Shuler
3 siblings, 0 replies; 29+ messages in thread
From: Shahaf Shuler @ 2017-09-11 12:50 UTC (permalink / raw)
To: nelio.laranjeiro; +Cc: dev, stable
Mellanox NICs has a limitation on the number of mbuf segments a multi
segment mbuf can have. The max number depends on the Tx offloads requested.
The current code not enforce such limitation, which might cause
malformed work requests to be written to the device.
This commit adds verification for the number of mbuf segments posted
to the device. In case of overflow the packet will not be sent.
In addition update the nic documentation with the limitation.
Considering device limitation is 63 data segments in a work request, the
maximum number of segment in mbuf was calculated taking TSO as the worst
case:
max_nb_segs = 63 - (control_segment + ethernet segment +
TSO headers inline + inline segment +
extra inline to align to cacheline)
Cc: stable@dpdk.org
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
on v3:
- remove debug prints from datapath.
on v2:
- remove parenthesis around MLX5_MAX_DS.
- add limitation to nic guide.
- update commit message.
- fix typo.
---
doc/guides/nics/mlx5.rst | 2 ++
drivers/net/mlx5/mlx5_defs.h | 3 ++-
drivers/net/mlx5/mlx5_prm.h | 3 +++
drivers/net/mlx5/mlx5_rxtx.c | 4 ++++
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 5 +++++
drivers/net/mlx5/mlx5_txq.c | 27 +++++++++++++++++++++++++++
6 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f4cb18bca..d8244de97 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -124,6 +124,8 @@ Limitations
Will match any ipv4 packet (VLAN included).
+- A multi segment mbuf must have less than 50 segments. That means mbuf->nb_segs < 50.
+
Configuration
-------------
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f65..3de0e5d81 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -100,7 +100,8 @@
/*
* Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause wqe DS to overlap.
*/
#define MLX5_VPMD_TX_MAX_BURST 32U
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 608072f7e..bc2b72333 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -154,6 +154,9 @@
/* Default mark value used when none is provided. */
#define MLX5_FLOW_MARK_DEFAULT 0xffffff
+/* Maximum number of DS in WQE. */
+#define MLX5_MAX_DS 63
+
/* Subset of struct mlx5_wqe_eth_seg. */
struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 7567f2329..fdd7067da 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -661,6 +661,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
else
j += sg;
next_pkt:
+ if (ds > MLX5_MAX_DS) {
+ txq->stats.oerrors++;
+ break;
+ }
++elts_head;
++pkts;
++i;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index f89762ff8..3583e6780 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -248,6 +248,10 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (segs_n == 1 ||
max_elts < segs_n || max_wqe < 2)
break;
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
+ break;
+ }
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
if (buf->ol_flags &
@@ -365,6 +369,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+ assert(pkts_n <= MLX5_MAX_DS - nb_dword_in_hdr);
if (unlikely(!pkts_n))
return 0;
elts = &(*txq->elts)[elts_head & elts_m];
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4b0b532b1..091b1a93d 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -288,6 +288,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
};
if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+ unsigned int ds_cnt;
+
tmpl.txq.max_inline =
((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE);
@@ -320,6 +322,31 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
attr.init.cap.max_inline_data =
tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
}
+ /*
+ * Check if the inline size is too large in a way which
+ * can make the wqe DS to overflow.
+ * Considering in calculation:
+ * WQE CTRL (1 DS)
+ * WQE ETH (1 DS)
+ * inline part (N DS)
+ */
+ ds_cnt = 2 +
+ (attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+ if (ds_cnt > MLX5_MAX_DS) {
+ unsigned int max_inline = (MLX5_MAX_DS - 2) *
+ MLX5_WQE_DWORD_SIZE;
+
+ /* Ceil down*/
+ max_inline = max_inline - (max_inline %
+ RTE_CACHE_LINE_SIZE);
+ WARN("txq inline is too large (%d) setting it to "
+ "the maximum possible: %d\n",
+ priv->txq_inline, max_inline);
+ tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+ attr.init.cap.max_inline_data = max_inline;
+ if (priv->mps == MLX5_MPW_ENHANCED)
+ tmpl.txq.inline_max_packet_sz = max_inline;
+ }
}
if (priv->tso) {
attr.init.max_tso_header =
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* [dpdk-stable] [PATCH v2 2/2] net/mlx5: enforce Tx num of segments limitation
2017-08-23 7:33 [dpdk-stable] [PATCH 1/2] net/mlx5: fix num seg assumption on vPMD Shahaf Shuler
` (2 preceding siblings ...)
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 " Shahaf Shuler
@ 2017-08-30 7:07 ` Shahaf Shuler
2017-09-04 14:57 ` Nélio Laranjeiro
3 siblings, 1 reply; 29+ messages in thread
From: Shahaf Shuler @ 2017-08-30 7:07 UTC (permalink / raw)
To: nelio.laranjeiro, adrien.mazarguil; +Cc: dev, stable
Mellanox NICs has a limitation on the number of mbuf segments a multi
segment mbuf can have. The max number depends on the Tx offloads requested.
The current code not enforce such limitation, which might cause
malformed work requests to be written to the device.
This commit adds verification for the number of mbuf segments posted
to the device. In case of overflow the packet will not be sent.
Debug prints were added to help application identify the cause for such
case.
In addition update the nic documentation with the limitation.
Considering device limitation is 63 data segments in a work request, the
maximum number of segment in mbuf was calculated taking TSO as the worst
case:
max_nb_segs = 63 - (control_segment + ethernet segment +
TSO headers inline + inline segment +
extra inline to align to cacheline)
Cc: stable@dpdk.org
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
This patch should be applied only after the series:
http://dpdk.org/dev/patchwork/patch/27367/
on v2:
- remove parenthesis around MLX5_MAX_DS.
- add limitation to nic guide.
- update commit message.
- fix typo.
---
doc/guides/nics/mlx5.rst | 2 ++
drivers/net/mlx5/mlx5_defs.h | 3 ++-
drivers/net/mlx5/mlx5_prm.h | 3 +++
drivers/net/mlx5/mlx5_rxtx.c | 30 +++++++++++++++++++++++++++---
drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 8 ++++++++
drivers/net/mlx5/mlx5_txq.c | 27 +++++++++++++++++++++++++++
6 files changed, 69 insertions(+), 4 deletions(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f4cb18bca..d8244de97 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -124,6 +124,8 @@ Limitations
Will match any ipv4 packet (VLAN included).
+- A multi segment mbuf must have less than 50 segments. That means mbuf->nb_segs < 50.
+
Configuration
-------------
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f65..3de0e5d81 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -100,7 +100,8 @@
/*
* Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause wqe DS to overlap.
*/
#define MLX5_VPMD_TX_MAX_BURST 32U
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 608072f7e..bc2b72333 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -154,6 +154,9 @@
/* Default mark value used when none is provided. */
#define MLX5_FLOW_MARK_DEFAULT 0xffffff
+/* Maximum number of DS in WQE. */
+#define MLX5_MAX_DS 63
+
/* Subset of struct mlx5_wqe_eth_seg. */
struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index fe9e7eac0..d7aa382f9 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -657,6 +657,15 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
else
j += sg;
next_pkt:
+ if (ds > MLX5_MAX_DS) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments "
+ "wqe.ds = %d, wqe.inline_sz = %d",
+ (void *)pkts, (*pkts)->nb_segs, ds,
+ pkt_inline_sz);
+#endif
+ break;
+ }
++elts_head;
++pkts;
++i;
@@ -843,8 +852,13 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments",
+ (void *)buf, segs_n);
+#endif
break;
+ }
max_elts -= segs_n;
--pkts_n;
/* Should we enable HW CKSUM offload */
@@ -1064,8 +1078,13 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments",
+ (void *)buf, segs_n);
+#endif
break;
+ }
max_elts -= segs_n;
--pkts_n;
/*
@@ -1353,8 +1372,13 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts - j < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments",
+ (void *)buf, segs_n);
+#endif
break;
+ }
/* Should we enable HW CKSUM offload. */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index f89762ff8..9d6f6df32 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -248,6 +248,13 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (segs_n == 1 ||
max_elts < segs_n || max_wqe < 2)
break;
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+ WARN("Cannot send packet %p with %d segments",
+ (void *)buf, segs_n);
+#endif
+ break;
+ }
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
if (buf->ol_flags &
@@ -365,6 +372,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+ assert(pkts_n <= MLX5_MAX_DS - nb_dword_in_hdr);
if (unlikely(!pkts_n))
return 0;
elts = &(*txq->elts)[elts_head & elts_m];
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4b0b532b1..091b1a93d 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -288,6 +288,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
};
if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+ unsigned int ds_cnt;
+
tmpl.txq.max_inline =
((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE);
@@ -320,6 +322,31 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
attr.init.cap.max_inline_data =
tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
}
+ /*
+ * Check if the inline size is too large in a way which
+ * can make the wqe DS to overflow.
+ * Considering in calculation:
+ * WQE CTRL (1 DS)
+ * WQE ETH (1 DS)
+ * inline part (N DS)
+ */
+ ds_cnt = 2 +
+ (attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+ if (ds_cnt > MLX5_MAX_DS) {
+ unsigned int max_inline = (MLX5_MAX_DS - 2) *
+ MLX5_WQE_DWORD_SIZE;
+
+ /* Ceil down*/
+ max_inline = max_inline - (max_inline %
+ RTE_CACHE_LINE_SIZE);
+ WARN("txq inline is too large (%d) setting it to "
+ "the maximum possible: %d\n",
+ priv->txq_inline, max_inline);
+ tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+ attr.init.cap.max_inline_data = max_inline;
+ if (priv->mps == MLX5_MPW_ENHANCED)
+ tmpl.txq.inline_max_packet_sz = max_inline;
+ }
}
if (priv->tso) {
attr.init.max_tso_header =
--
2.12.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [dpdk-stable] [PATCH v2 2/2] net/mlx5: enforce Tx num of segments limitation
2017-08-30 7:07 ` [dpdk-stable] [PATCH v2 2/2] " Shahaf Shuler
@ 2017-09-04 14:57 ` Nélio Laranjeiro
0 siblings, 0 replies; 29+ messages in thread
From: Nélio Laranjeiro @ 2017-09-04 14:57 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: adrien.mazarguil, dev, stable
On Wed, Aug 30, 2017 at 10:07:08AM +0300, Shahaf Shuler wrote:
> Mellanox NICs has a limitation on the number of mbuf segments a multi
> segment mbuf can have. The max number depends on the Tx offloads requested.
>
> The current code not enforce such limitation, which might cause
> malformed work requests to be written to the device.
>
> This commit adds verification for the number of mbuf segments posted
> to the device. In case of overflow the packet will not be sent.
> Debug prints were added to help application identify the cause for such
> case.
>
> In addition update the nic documentation with the limitation.
> Considering device limitation is 63 data segments in a work request, the
> maximum number of segment in mbuf was calculated taking TSO as the worst
> case:
>
> max_nb_segs = 63 - (control_segment + ethernet segment +
> TSO headers inline + inline segment +
> extra inline to align to cacheline)
>
> Cc: stable@dpdk.org
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
> This patch should be applied only after the series:
> http://dpdk.org/dev/patchwork/patch/27367/
>
> on v2:
> - remove parenthesis around MLX5_MAX_DS.
> - add limitation to nic guide.
> - update commit message.
> - fix typo.
> ---
> doc/guides/nics/mlx5.rst | 2 ++
> drivers/net/mlx5/mlx5_defs.h | 3 ++-
> drivers/net/mlx5/mlx5_prm.h | 3 +++
> drivers/net/mlx5/mlx5_rxtx.c | 30 +++++++++++++++++++++++++++---
> drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 8 ++++++++
> drivers/net/mlx5/mlx5_txq.c | 27 +++++++++++++++++++++++++++
> 6 files changed, 69 insertions(+), 4 deletions(-)
>
> diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
> index f4cb18bca..d8244de97 100644
> --- a/doc/guides/nics/mlx5.rst
> +++ b/doc/guides/nics/mlx5.rst
> @@ -124,6 +124,8 @@ Limitations
>
> Will match any ipv4 packet (VLAN included).
>
> +- A multi segment mbuf must have less than 50 segments. That means mbuf->nb_segs < 50.
> +
> Configuration
> -------------
>
> diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
> index a76bc6f65..3de0e5d81 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -100,7 +100,8 @@
>
> /*
> * Maximum size of burst for vectorized Tx. This is related to the maximum size
> - * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
> + * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
> + * Careful when changing, large value can cause wqe DS to overlap.
> */
> #define MLX5_VPMD_TX_MAX_BURST 32U
>
> diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
> index 608072f7e..bc2b72333 100644
> --- a/drivers/net/mlx5/mlx5_prm.h
> +++ b/drivers/net/mlx5/mlx5_prm.h
> @@ -154,6 +154,9 @@
> /* Default mark value used when none is provided. */
> #define MLX5_FLOW_MARK_DEFAULT 0xffffff
>
> +/* Maximum number of DS in WQE. */
> +#define MLX5_MAX_DS 63
> +
> /* Subset of struct mlx5_wqe_eth_seg. */
> struct mlx5_wqe_eth_seg_small {
> uint32_t rsvd0;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index fe9e7eac0..d7aa382f9 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -657,6 +657,15 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
> else
> j += sg;
> next_pkt:
> + if (ds > MLX5_MAX_DS) {
> +#ifndef NDEBUG
> + WARN("Cannot send packet %p with %d segments "
> + "wqe.ds = %d, wqe.inline_sz = %d",
> + (void *)pkts, (*pkts)->nb_segs, ds,
> + pkt_inline_sz);
> +#endif
I will say no for those series of warning in the datapath, I understand the
need of such informations, but it will reduce considerably the throughput in
some cases where the issue is not related to this.
Datapath must also be optimised also in debug mode.
I suggestion would be to change the burst API to return an int with correct
errors values.
Currently oerrors can be increased in such situation to provide more
information to the user.
Thanks,
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 29+ messages in thread