* [dpdk-dev] [PATCH] net/mlx5: fix the legacy multi packet write session
@ 2020-02-09 22:54 Viacheslav Ovsiienko
2020-02-10 15:08 ` Matan Azrad
2020-02-11 8:33 ` Raslan Darawsheh
0 siblings, 2 replies; 3+ messages in thread
From: Viacheslav Ovsiienko @ 2020-02-09 22:54 UTC (permalink / raw)
To: dev; +Cc: matan, rasland, orika, stable
To provide the better PCIe bandwidth utilization the ConnectX-4LX
NIC supports the multi-packet write (MPW) sessions allowing to
pack multiple packets into one descriptor (WQE). This is legacy
feature and it has some limitations on the packets and data
description segments. To provide the best performance all inline
packets must be put into shared data segment and the total length
of MPW session must be limited. The limit is controlled with
txq_inline_mpw devarg.
Fixes: 82e75f8323bf ("net/mlx5: fix legacy multi-packet Tx descriptors")
Cc: stable@dpdk.org
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
drivers/common/mlx5/mlx5_prm.h | 2 +-
drivers/net/mlx5/mlx5_rxtx.c | 90 ++++++++++++++++++++++++++++++++++++------
2 files changed, 80 insertions(+), 12 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 855b37a..4ac3d4b 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -100,7 +100,7 @@
*/
#define MLX5_EMPW_MAX_PACKETS MLX5_TX_COMP_THRESH
#define MLX5_MPW_MAX_PACKETS 6
-#define MLX5_MPW_INLINE_MAX_PACKETS 2
+#define MLX5_MPW_INLINE_MAX_PACKETS 6
/*
* Default packet length threshold to be inlined with
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5eea932..86e0fab 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2949,8 +2949,14 @@ enum mlx5_txcmp_code {
unsigned int part;
uint8_t *pdst;
- dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE);
- pdst = &dseg->inline_data[0];
+ if (!MLX5_TXOFF_CONFIG(MPW)) {
+ /* Store the descriptor byte counter for eMPW sessions. */
+ dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE);
+ pdst = &dseg->inline_data[0];
+ } else {
+ /* The entire legacy MPW session counter is stored on close. */
+ pdst = (uint8_t *)dseg;
+ }
/*
* The WQEBB space availability is checked by caller.
* Here we should be aware of WQE ring buffer wraparound only.
@@ -2962,7 +2968,8 @@ enum mlx5_txcmp_code {
len -= part;
if (likely(!len)) {
pdst += part;
- pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE);
+ if (!MLX5_TXOFF_CONFIG(MPW))
+ pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE);
/* Note: no final wraparound check here. */
return (struct mlx5_wqe_dseg *)pdst;
}
@@ -3010,9 +3017,16 @@ enum mlx5_txcmp_code {
static_assert(MLX5_DSEG_MIN_INLINE_SIZE ==
(2 * RTE_ETHER_ADDR_LEN),
"invalid Data Segment data size");
- dseg->bcount = rte_cpu_to_be_32((len + sizeof(struct rte_vlan_hdr)) |
- MLX5_ETH_WQE_DATA_INLINE);
- pdst = &dseg->inline_data[0];
+ if (!MLX5_TXOFF_CONFIG(MPW)) {
+ /* Store the descriptor byte counter for eMPW sessions. */
+ dseg->bcount = rte_cpu_to_be_32
+ ((len + sizeof(struct rte_vlan_hdr)) |
+ MLX5_ETH_WQE_DATA_INLINE);
+ pdst = &dseg->inline_data[0];
+ } else {
+ /* The entire legacy MPW session counter is stored on close. */
+ pdst = (uint8_t *)dseg;
+ }
memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE);
buf += MLX5_DSEG_MIN_INLINE_SIZE;
pdst += MLX5_DSEG_MIN_INLINE_SIZE;
@@ -3035,7 +3049,8 @@ enum mlx5_txcmp_code {
len -= part;
if (likely(!len)) {
pdst += part;
- pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE);
+ if (!MLX5_TXOFF_CONFIG(MPW))
+ pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE);
/* Note: no final wraparound check here. */
return (struct mlx5_wqe_dseg *)pdst;
}
@@ -3921,15 +3936,33 @@ enum mlx5_txcmp_code {
unsigned int slen,
unsigned int olx __rte_unused)
{
+ struct mlx5_wqe_dseg *dseg = &loc->wqe_last->dseg[0];
+
MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE));
- MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0);
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Update sent data bytes counter. */
txq->stats.obytes += slen;
#else
(void)slen;
#endif
- len = len / MLX5_WSEG_SIZE + 2;
+ if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) {
+ /*
+ * If the legacy MPW session contains the inline packets
+ * we should set the only inline data segment length
+ * and align the total length to the segment size.
+ */
+ MLX5_ASSERT(len > sizeof(dseg->bcount));
+ dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) |
+ MLX5_ETH_WQE_DATA_INLINE);
+ len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2;
+ } else {
+ /*
+ * The session is not legacy MPW or contains the
+ * data buffer pointer segments.
+ */
+ MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0);
+ len = len / MLX5_WSEG_SIZE + 2;
+ }
loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len);
txq->wqe_ci += (len + 3) / 4;
loc->wqe_free -= (len + 3) / 4;
@@ -4208,6 +4241,15 @@ enum mlx5_txcmp_code {
loc->wqe_free) * MLX5_WQE_SIZE -
MLX5_WQE_CSEG_SIZE -
MLX5_WQE_ESEG_SIZE;
+ /* Limit the room for legacy MPW sessions for performance. */
+ if (MLX5_TXOFF_CONFIG(MPW))
+ room = RTE_MIN(room,
+ RTE_MAX(txq->inlen_empw +
+ sizeof(dseg->bcount) +
+ (MLX5_TXOFF_CONFIG(VLAN) ?
+ sizeof(struct rte_vlan_hdr) : 0),
+ MLX5_MPW_INLINE_MAX_PACKETS *
+ MLX5_WQE_DSEG_SIZE));
/* Build WQE till we have space, packets and resources. */
part = room;
for (;;) {
@@ -4238,8 +4280,26 @@ enum mlx5_txcmp_code {
if (dlen > txq->inlen_empw ||
loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE)
goto pointer_empw;
+ if (MLX5_TXOFF_CONFIG(MPW)) {
+ tlen = dlen;
+ if (part == room) {
+ /* Open new inline MPW session. */
+ tlen += sizeof(dseg->bcount);
+ dseg->bcount = RTE_BE32(0);
+ dseg = RTE_PTR_ADD
+ (dseg, sizeof(dseg->bcount));
+ } else {
+ /*
+ * No pointer and inline descriptor
+ * intermix for legacy MPW sessions.
+ */
+ if (loc->wqe_last->dseg[0].bcount)
+ break;
+ }
+ } else {
+ tlen = sizeof(dseg->bcount) + dlen;
+ }
/* Inline entire packet, optional VLAN insertion. */
- tlen = sizeof(dseg->bcount) + dlen;
if (MLX5_TXOFF_CONFIG(VLAN) &&
loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) {
/*
@@ -4265,7 +4325,8 @@ enum mlx5_txcmp_code {
dseg = mlx5_tx_dseg_empw(txq, loc, dseg,
dptr, dlen, olx);
}
- tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE);
+ if (!MLX5_TXOFF_CONFIG(MPW))
+ tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE);
MLX5_ASSERT(room >= tlen);
room -= tlen;
/*
@@ -4276,6 +4337,13 @@ enum mlx5_txcmp_code {
goto next_mbuf;
pointer_empw:
/*
+ * No pointer and inline descriptor
+ * intermix for legacy MPW sessions.
+ */
+ if (MLX5_TXOFF_CONFIG(MPW) &&
+ loc->wqe_last->dseg[0].bcount == RTE_BE32(0))
+ break;
+ /*
* Not inlinable VLAN packets are
* proceeded outside of this routine.
*/
--
1.8.3.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH] net/mlx5: fix the legacy multi packet write session
2020-02-09 22:54 [dpdk-dev] [PATCH] net/mlx5: fix the legacy multi packet write session Viacheslav Ovsiienko
@ 2020-02-10 15:08 ` Matan Azrad
2020-02-11 8:33 ` Raslan Darawsheh
1 sibling, 0 replies; 3+ messages in thread
From: Matan Azrad @ 2020-02-10 15:08 UTC (permalink / raw)
To: Slava Ovsiienko, dev; +Cc: Raslan Darawsheh, Ori Kam, stable
From: Viacheslav Ovsiienko
> To provide the better PCIe bandwidth utilization the ConnectX-4LX NIC
> supports the multi-packet write (MPW) sessions allowing to pack multiple
> packets into one descriptor (WQE). This is legacy feature and it has some
> limitations on the packets and data description segments. To provide the
> best performance all inline packets must be put into shared data segment
> and the total length of MPW session must be limited. The limit is controlled
> with txq_inline_mpw devarg.
>
> Fixes: 82e75f8323bf ("net/mlx5: fix legacy multi-packet Tx descriptors")
> Cc: stable@dpdk.org
>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH] net/mlx5: fix the legacy multi packet write session
2020-02-09 22:54 [dpdk-dev] [PATCH] net/mlx5: fix the legacy multi packet write session Viacheslav Ovsiienko
2020-02-10 15:08 ` Matan Azrad
@ 2020-02-11 8:33 ` Raslan Darawsheh
1 sibling, 0 replies; 3+ messages in thread
From: Raslan Darawsheh @ 2020-02-11 8:33 UTC (permalink / raw)
To: Slava Ovsiienko, dev; +Cc: Matan Azrad, Ori Kam, stable
Hi
> -----Original Message-----
> From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> Sent: Monday, February 10, 2020 12:55 AM
> To: dev@dpdk.org
> Cc: Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>;
> stable@dpdk.org
> Subject: [PATCH] net/mlx5: fix the legacy multi packet write session
>
> To provide the better PCIe bandwidth utilization the ConnectX-4LX
> NIC supports the multi-packet write (MPW) sessions allowing to
> pack multiple packets into one descriptor (WQE). This is legacy
> feature and it has some limitations on the packets and data
> description segments. To provide the best performance all inline
> packets must be put into shared data segment and the total length
> of MPW session must be limited. The limit is controlled with
> txq_inline_mpw devarg.
>
> Fixes: 82e75f8323bf ("net/mlx5: fix legacy multi-packet Tx descriptors")
> Cc: stable@dpdk.org
>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
> drivers/common/mlx5/mlx5_prm.h | 2 +-
> drivers/net/mlx5/mlx5_rxtx.c | 90
> ++++++++++++++++++++++++++++++++++++------
> 2 files changed, 80 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/common/mlx5/mlx5_prm.h
> b/drivers/common/mlx5/mlx5_prm.h
> index 855b37a..4ac3d4b 100644
> --- a/drivers/common/mlx5/mlx5_prm.h
> +++ b/drivers/common/mlx5/mlx5_prm.h
> @@ -100,7 +100,7 @@
> */
Patch applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-02-11 8:33 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-09 22:54 [dpdk-dev] [PATCH] net/mlx5: fix the legacy multi packet write session Viacheslav Ovsiienko
2020-02-10 15:08 ` Matan Azrad
2020-02-11 8:33 ` Raslan Darawsheh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).