[dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments

DPDK patches and discussions
 help / color / mirror / Atom feed

* [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments
@ 2017-12-27  3:55 Yongseok Koh
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 2/5] net/mlx5: consolidate condition checks for TSO Yongseok Koh
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Yongseok Koh @ 2017-12-27  3:55 UTC (permalink / raw)
  To: adrien.mazarguil, nelio.laranjeiro; +Cc: dev, Yongseok Koh

mlx5_tx_burst() doesn't inline data from the 2nd segment. If there's still
enough room in the descriptor after inlining the 1st segment, further
inlining from the 2nd segment would be beneficial to save PCIe bandwidth.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 28c0ad8ab..1e0f5dc52 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -390,7 +390,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (max_elts < segs_n)
 			break;
 		max_elts -= segs_n;
-		--segs_n;
+		sg = --segs_n;
 		if (unlikely(--max_wqe == 0))
 			break;
 		wqe = (volatile struct mlx5_wqe_v *)
@@ -516,7 +516,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		}
 		/* Inline if enough room. */
 		if (max_inline || tso) {
-			uint32_t inl;
+			uint32_t inl = 0;
 			uintptr_t end = (uintptr_t)
 				(((uintptr_t)txq->wqes) +
 				 (1 << txq->wqe_n) * MLX5_WQE_SIZE);
@@ -524,12 +524,14 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 						   RTE_CACHE_LINE_SIZE -
 						   (pkt_inline_sz - 2) -
 						   !!tso * sizeof(inl);
-			uintptr_t addr_end = (addr + inline_room) &
-					     ~(RTE_CACHE_LINE_SIZE - 1);
-			unsigned int copy_b = (addr_end > addr) ?
-				RTE_MIN((addr_end - addr), length) :
-				0;
-
+			uintptr_t addr_end;
+			unsigned int copy_b;
+
+pkt_inline:
+			addr_end = RTE_ALIGN_FLOOR(addr + inline_room,
+						   RTE_CACHE_LINE_SIZE);
+			copy_b = (addr_end > addr) ?
+				 RTE_MIN((addr_end - addr), length) : 0;
 			if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
 				/*
 				 * One Dseg remains in the current WQE.  To
@@ -541,7 +543,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				if (unlikely(max_wqe < n))
 					break;
 				max_wqe -= n;
-				if (tso) {
+				if (tso && !inl) {
 					inl = rte_cpu_to_be_32(copy_b |
 							       MLX5_INLINE_SEG);
 					rte_memcpy((void *)raw,
@@ -576,11 +578,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			} else if (!segs_n) {
 				goto next_pkt;
 			} else {
-				/* dseg will be advance as part of next_seg */
-				dseg = (volatile rte_v128u32_t *)
-					((uintptr_t)wqe +
-					 ((ds - 1) * MLX5_WQE_DWORD_SIZE));
-				goto next_seg;
+				raw += copy_b;
+				inline_room -= copy_b;
+				--segs_n;
+				buf = buf->next;
+				assert(buf);
+				addr = rte_pktmbuf_mtod(buf, uintptr_t);
+				length = DATA_LEN(buf);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+				total_length += length;
+#endif
+				(*txq->elts)[++elts_head & elts_m] = buf;
+				goto pkt_inline;
 			}
 		} else {
 			/*
@@ -639,12 +648,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			addr >> 32,
 		};
 		(*txq->elts)[++elts_head & elts_m] = buf;
-		++sg;
-		/* Advance counter only if all segs are successfully posted. */
-		if (sg < segs_n)
+		if (--segs_n)
 			goto next_seg;
-		else
-			j += sg;
 next_pkt:
 		if (ds > MLX5_DSEG_MAX) {
 			txq->stats.oerrors++;
@@ -653,6 +658,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		++elts_head;
 		++pkts;
 		++i;
+		j += sg;
 		/* Initialize known and common part of the WQE structure. */
 		if (tso) {
 			wqe->ctrl = (rte_v128u32_t){
-- 
2.11.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-dev] [PATCH 2/5] net/mlx5: consolidate condition checks for TSO
  2017-12-27  3:55 [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Yongseok Koh
@ 2017-12-27  3:55 ` Yongseok Koh
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 3/5] net/mlx5: add missing sanity checks for Tx completion queue Yongseok Koh
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Yongseok Koh @ 2017-12-27  3:55 UTC (permalink / raw)
  To: adrien.mazarguil, nelio.laranjeiro; +Cc: dev, Yongseok Koh

This change helps compileer to better optimize the code.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 126 ++++++++++++++++++++-----------------------
 1 file changed, 58 insertions(+), 68 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 1e0f5dc52..5e58f1afd 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -441,77 +441,67 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			addr += pkt_inline_sz;
 		}
 		raw += MLX5_WQE_DWORD_SIZE;
-		if (txq->tso_en) {
-			tso = buf->ol_flags & PKT_TX_TCP_SEG;
-			if (tso) {
-				uintptr_t end = (uintptr_t)
-						(((uintptr_t)txq->wqes) +
-						(1 << txq->wqe_n) *
-						MLX5_WQE_SIZE);
-				unsigned int copy_b;
-				uint8_t vlan_sz = (buf->ol_flags &
-						  PKT_TX_VLAN_PKT) ? 4 : 0;
-				const uint64_t is_tunneled =
-							buf->ol_flags &
-							(PKT_TX_TUNNEL_GRE |
-							 PKT_TX_TUNNEL_VXLAN);
-
-				tso_header_sz = buf->l2_len + vlan_sz +
-						buf->l3_len + buf->l4_len;
-				tso_segsz = buf->tso_segsz;
-				if (unlikely(tso_segsz == 0)) {
-					txq->stats.oerrors++;
-					break;
-				}
-				if (is_tunneled	&& txq->tunnel_en) {
-					tso_header_sz += buf->outer_l2_len +
-							 buf->outer_l3_len;
-					cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
-				} else {
-					cs_flags |= MLX5_ETH_WQE_L4_CSUM;
-				}
-				if (unlikely(tso_header_sz >
-					     MLX5_MAX_TSO_HEADER)) {
-					txq->stats.oerrors++;
+		tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
+		if (tso) {
+			uintptr_t end =
+				(uintptr_t)(((uintptr_t)txq->wqes) +
+					    (1 << txq->wqe_n) * MLX5_WQE_SIZE);
+			unsigned int copy_b;
+			uint8_t vlan_sz =
+				(buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0;
+			const uint64_t is_tunneled =
+				buf->ol_flags & (PKT_TX_TUNNEL_GRE |
+						 PKT_TX_TUNNEL_VXLAN);
+
+			tso_header_sz = buf->l2_len + vlan_sz +
+					buf->l3_len + buf->l4_len;
+			tso_segsz = buf->tso_segsz;
+			if (unlikely(tso_segsz == 0)) {
+				txq->stats.oerrors++;
+				break;
+			}
+			if (is_tunneled	&& txq->tunnel_en) {
+				tso_header_sz += buf->outer_l2_len +
+						 buf->outer_l3_len;
+				cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
+			} else {
+				cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+			}
+			if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) {
+				txq->stats.oerrors++;
+				break;
+			}
+			copy_b = tso_header_sz - pkt_inline_sz;
+			/* First seg must contain all headers. */
+			assert(copy_b <= length);
+			if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
+				uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
+
+				if (unlikely(max_wqe < n))
 					break;
-				}
-				copy_b = tso_header_sz - pkt_inline_sz;
-				/* First seg must contain all headers. */
-				assert(copy_b <= length);
-				if (copy_b &&
-				   ((end - (uintptr_t)raw) > copy_b)) {
-					uint16_t n = (MLX5_WQE_DS(copy_b) -
-						      1 + 3) / 4;
-
-					if (unlikely(max_wqe < n))
-						break;
-					max_wqe -= n;
-					rte_memcpy((void *)raw,
-						   (void *)addr, copy_b);
-					addr += copy_b;
-					length -= copy_b;
-					/* Include padding for TSO header. */
-					copy_b = MLX5_WQE_DS(copy_b) *
-						 MLX5_WQE_DWORD_SIZE;
-					pkt_inline_sz += copy_b;
-					raw += copy_b;
-				} else {
-					/* NOP WQE. */
-					wqe->ctrl = (rte_v128u32_t){
-						     rte_cpu_to_be_32(
-							txq->wqe_ci << 8),
-						     rte_cpu_to_be_32(
-							txq->qp_num_8s | 1),
-						     0,
-						     0,
-					};
-					ds = 1;
+				max_wqe -= n;
+				rte_memcpy((void *)raw, (void *)addr, copy_b);
+				addr += copy_b;
+				length -= copy_b;
+				/* Include padding for TSO header. */
+				copy_b = MLX5_WQE_DS(copy_b) *
+					 MLX5_WQE_DWORD_SIZE;
+				pkt_inline_sz += copy_b;
+				raw += copy_b;
+			} else {
+				/* NOP WQE. */
+				wqe->ctrl = (rte_v128u32_t){
+					rte_cpu_to_be_32(txq->wqe_ci << 8),
+					rte_cpu_to_be_32(txq->qp_num_8s | 1),
+					0,
+					0,
+				};
+				ds = 1;
 #ifdef MLX5_PMD_SOFT_COUNTERS
-					total_length = 0;
+				total_length = 0;
 #endif
-					k++;
-					goto next_wqe;
-				}
+				k++;
+				goto next_wqe;
 			}
 		}
 		/* Inline if enough room. */
-- 
2.11.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-dev] [PATCH 3/5] net/mlx5: add missing sanity checks for Tx completion queue
  2017-12-27  3:55 [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Yongseok Koh
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 2/5] net/mlx5: consolidate condition checks for TSO Yongseok Koh
@ 2017-12-27  3:55 ` Yongseok Koh
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 4/5] net/mlx5: add fallback in Tx for multi-segment packet Yongseok Koh
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Yongseok Koh @ 2017-12-27  3:55 UTC (permalink / raw)
  To: adrien.mazarguil, nelio.laranjeiro; +Cc: dev, Yongseok Koh

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c          | 19 ++++++++++++++++++-
 drivers/net/mlx5/mlx5_rxtx.h          |  2 ++
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h |  8 ++++++++
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h  |  8 ++++++++
 drivers/net/mlx5/mlx5_txq.c           |  2 ++
 5 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5e58f1afd..7c9d18270 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -357,6 +357,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Start processing. */
 	mlx5_tx_complete(txq);
 	max_elts = (elts_n - (elts_head - txq->elts_tail));
+	/* A CQE slot must always be available. */
+	assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
 	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
 	if (unlikely(!max_wqe))
 		return 0;
@@ -700,6 +702,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		/* Save elts_head in unused "immediate" field of WQE. */
 		last_wqe->ctrl3 = txq->elts_head;
 		txq->elts_comp = 0;
+#ifndef NDEBUG
+		++txq->cq_pi;
+#endif
 	} else {
 		txq->elts_comp = comp;
 	}
@@ -818,6 +823,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Start processing. */
 	mlx5_tx_complete(txq);
 	max_elts = (elts_n - (elts_head - txq->elts_tail));
+	/* A CQE slot must always be available. */
+	assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
 	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
 	if (unlikely(!max_wqe))
 		return 0;
@@ -911,6 +918,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		/* Save elts_head in unused "immediate" field of WQE. */
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
+#ifndef NDEBUG
+		++txq->cq_pi;
+#endif
 	} else {
 		txq->elts_comp = comp;
 	}
@@ -1042,6 +1052,8 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 	/* Start processing. */
 	mlx5_tx_complete(txq);
 	max_elts = (elts_n - (elts_head - txq->elts_tail));
+	/* A CQE slot must always be available. */
+	assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
 	do {
 		struct rte_mbuf *buf = *(pkts++);
 		uintptr_t addr;
@@ -1203,6 +1215,9 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 		/* Save elts_head in unused "immediate" field of WQE. */
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
+#ifndef NDEBUG
+		++txq->cq_pi;
+#endif
 	} else {
 		txq->elts_comp = comp;
 	}
@@ -1549,7 +1564,9 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
 		txq->mpw_comp = txq->wqe_ci;
-		txq->cq_pi++;
+#ifndef NDEBUG
+		++txq->cq_pi;
+#endif
 	} else {
 		txq->elts_comp += j;
 	}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index b783ca203..c072169f2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -185,7 +185,9 @@ struct mlx5_txq_data {
 	uint16_t elts_comp; /* Counter since last completion request. */
 	uint16_t mpw_comp; /* WQ index since last completion request. */
 	uint16_t cq_ci; /* Consumer index for completion queue. */
+#ifndef NDEBUG
 	uint16_t cq_pi; /* Producer index for completion queue. */
+#endif
 	uint16_t wqe_ci; /* Consumer index for work queue. */
 	uint16_t wqe_pi; /* Producer index for work queue. */
 	uint16_t elts_n:4; /* (*elts)[] length (in log2). */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 77ce0c3e0..661fbf11c 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -135,6 +135,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
 
 	assert(elts_n > pkts_n);
 	mlx5_tx_complete(txq);
+	/* A CQE slot must always be available. */
+	assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
 	if (unlikely(!pkts_n))
 		return 0;
 	for (n = 0; n < pkts_n; ++n) {
@@ -205,7 +207,9 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
 		wqe->ctrl[2] = rte_cpu_to_be_32(8);
 		wqe->ctrl[3] = txq->elts_head;
 		txq->elts_comp = 0;
+#ifndef NDEBUG
 		++txq->cq_pi;
+#endif
 	}
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	txq->stats.opackets += n;
@@ -269,6 +273,8 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	assert(elts_n > pkts_n);
 	mlx5_tx_complete(txq);
 	max_elts = (elts_n - (elts_head - txq->elts_tail));
+	/* A CQE slot must always be available. */
+	assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
 	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
 	pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
 	if (unlikely(!pkts_n))
@@ -306,7 +312,9 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	} else {
 		/* Request a completion. */
 		txq->elts_comp = 0;
+#ifndef NDEBUG
 		++txq->cq_pi;
+#endif
 		comp_req = 8;
 	}
 	/* Fill CTRL in the header. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index f25681184..2b0e62861 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -135,6 +135,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
 
 	assert(elts_n > pkts_n);
 	mlx5_tx_complete(txq);
+	/* A CQE slot must always be available. */
+	assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
 	if (unlikely(!pkts_n))
 		return 0;
 	for (n = 0; n < pkts_n; ++n) {
@@ -206,7 +208,9 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
 		wqe->ctrl[2] = rte_cpu_to_be_32(8);
 		wqe->ctrl[3] = txq->elts_head;
 		txq->elts_comp = 0;
+#ifndef NDEBUG
 		++txq->cq_pi;
+#endif
 	}
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	txq->stats.opackets += n;
@@ -268,6 +272,8 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	assert(elts_n > pkts_n);
 	mlx5_tx_complete(txq);
 	max_elts = (elts_n - (elts_head - txq->elts_tail));
+	/* A CQE slot must always be available. */
+	assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
 	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
 	pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
 	assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr);
@@ -307,7 +313,9 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	} else {
 		/* Request a completion. */
 		txq->elts_comp = 0;
+#ifndef NDEBUG
 		++txq->cq_pi;
+#endif
 		comp_req = 8;
 	}
 	/* Fill CTRL in the header. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index a786a6b63..89b16fda2 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -418,7 +418,9 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
 		(volatile struct mlx5_cqe (*)[])
 		(uintptr_t)cq_info.buf;
 	txq_data->cq_ci = 0;
+#ifndef NDEBUG
 	txq_data->cq_pi = 0;
+#endif
 	txq_data->wqe_ci = 0;
 	txq_data->wqe_pi = 0;
 	txq_ibv->qp = tmpl.qp;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-dev] [PATCH 4/5] net/mlx5: add fallback in Tx for multi-segment packet
  2017-12-27  3:55 [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Yongseok Koh
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 2/5] net/mlx5: consolidate condition checks for TSO Yongseok Koh
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 3/5] net/mlx5: add missing sanity checks for Tx completion queue Yongseok Koh
@ 2017-12-27  3:55 ` Yongseok Koh
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 5/5] net/mlx5: clean up multi-segment packet processing Yongseok Koh
  2017-12-27  6:06 ` [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Shahaf Shuler
  4 siblings, 0 replies; 6+ messages in thread
From: Yongseok Koh @ 2017-12-27  3:55 UTC (permalink / raw)
  To: adrien.mazarguil, nelio.laranjeiro; +Cc: dev, Yongseok Koh

mlx5_tx_burst_empw() falls back to legacy Tx descriptor for multi-segmented
packets without taking advantage of inlining. In many cases, the 1st
segment can be inlined and this could make device fetch only one segment
instead of two. This helps saving PCIe bandwitdth when trasmitting out
multi-segmented packets with still using the Enhanced Multi-Packet Send for
other packets.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c          | 53 +++++++++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_rxtx.h          | 50 +++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx_vec.c      | 27 +-----------------
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h |  2 +-
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h  |  2 +-
 5 files changed, 100 insertions(+), 34 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 7c9d18270..d5e32b845 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1304,10 +1304,10 @@ mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 }
 
 /**
- * DPDK callback for TX with Enhanced MPW support.
+ * TX with Enhanced MPW support.
  *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
+ * @param txq
+ *   Pointer to TX queue structure.
  * @param[in] pkts
  *   Packets to transmit.
  * @param pkts_n
@@ -1316,10 +1316,10 @@ mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
  * @return
  *   Number of packets successfully transmitted (<= pkts_n).
  */
-uint16_t
-mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+static inline uint16_t
+txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+	       uint16_t pkts_n)
 {
-	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -1585,6 +1585,47 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 }
 
 /**
+ * DPDK callback for TX with Enhanced MPW support.
+ *
+ * @param dpdk_txq
+ *   Generic pointer to TX queue structure.
+ * @param[in] pkts
+ *   Packets to transmit.
+ * @param pkts_n
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+	uint16_t nb_tx = 0;
+
+	while (pkts_n > nb_tx) {
+		uint16_t n;
+		uint16_t ret;
+
+		n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx);
+		if (n) {
+			ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n);
+			if (!ret)
+				break;
+			nb_tx += ret;
+		}
+		n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx);
+		if (n) {
+			ret = txq_burst_empw(txq, &pkts[nb_tx], n);
+			if (!ret)
+				break;
+			nb_tx += ret;
+		}
+	}
+	return nb_tx;
+}
+
+/**
  * Translate RX completion flags to packet type.
  *
  * @param[in] cqe
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index c072169f2..e70d52361 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -652,4 +652,54 @@ txq_ol_cksum_to_cs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf)
 	return cs_flags;
 }
 
+/**
+ * Count the number of contiguous single segment packets.
+ *
+ * @param pkts
+ *   Pointer to array of packets.
+ * @param pkts_n
+ *   Number of packets.
+ *
+ * @return
+ *   Number of contiguous single segment packets.
+ */
+static __rte_always_inline unsigned int
+txq_count_contig_single_seg(struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	unsigned int pos;
+
+	if (!pkts_n)
+		return 0;
+	/* Count the number of contiguous single segment packets. */
+	for (pos = 0; pos < pkts_n; ++pos)
+		if (NB_SEGS(pkts[pos]) > 1)
+			break;
+	return pos;
+}
+
+/**
+ * Count the number of contiguous multi-segment packets.
+ *
+ * @param pkts
+ *   Pointer to array of packets.
+ * @param pkts_n
+ *   Number of packets.
+ *
+ * @return
+ *   Number of contiguous multi-segment packets.
+ */
+static __rte_always_inline unsigned int
+txq_count_contig_multi_seg(struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	unsigned int pos;
+
+	if (!pkts_n)
+		return 0;
+	/* Count the number of contiguous multi-segment packets. */
+	for (pos = 0; pos < pkts_n; ++pos)
+		if (NB_SEGS(pkts[pos]) == 1)
+			break;
+	return pos;
+}
+
 #endif /* RTE_PMD_MLX5_RXTX_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 3aca17cb4..8d23dae7e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -68,31 +68,6 @@
 #endif
 
 /**
- * Count the number of continuous single segment packets.
- *
- * @param pkts
- *   Pointer to array of packets.
- * @param pkts_n
- *   Number of packets.
- *
- * @return
- *   Number of continuous single segment packets.
- */
-static inline unsigned int
-txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	unsigned int pos;
-
-	if (!pkts_n)
-		return 0;
-	/* Count the number of continuous single segment packets. */
-	for (pos = 0; pos < pkts_n; ++pos)
-		if (NB_SEGS(pkts[pos]) > 1)
-			break;
-	return pos;
-}
-
-/**
  * Count the number of packets having same ol_flags and calculate cs_flags.
  *
  * @param txq
@@ -192,7 +167,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 					       pkts_n - nb_tx);
 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
 		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS))
-			n = txq_check_multiseg(&pkts[nb_tx], n);
+			n = txq_count_contig_single_seg(&pkts[nb_tx], n);
 		if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
 			n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
 		ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 661fbf11c..c5d5b0519 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -222,7 +222,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
  * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
  * it returns to make it processed by txq_scatter_v(). All the packets in
  * the pkts list should be single segment packets having same offload flags.
- * This must be checked by txq_check_multiseg() and txq_calc_offload().
+ * This must be checked by txq_count_contig_single_seg() and txq_calc_offload().
  *
  * @param txq
  *   Pointer to TX queue structure.
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 2b0e62861..0dd8145bc 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -223,7 +223,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
  * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
  * it returns to make it processed by txq_scatter_v(). All the packets in
  * the pkts list should be single segment packets having same offload flags.
- * This must be checked by txq_check_multiseg() and txq_calc_offload().
+ * This must be checked by txq_count_contig_single_seg() and txq_calc_offload().
  *
  * @param txq
  *   Pointer to TX queue structure.
-- 
2.11.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-dev] [PATCH 5/5] net/mlx5: clean up multi-segment packet processing
  2017-12-27  3:55 [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Yongseok Koh
                   ` (2 preceding siblings ...)
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 4/5] net/mlx5: add fallback in Tx for multi-segment packet Yongseok Koh
@ 2017-12-27  3:55 ` Yongseok Koh
  2017-12-27  6:06 ` [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Shahaf Shuler
  4 siblings, 0 replies; 6+ messages in thread
From: Yongseok Koh @ 2017-12-27  3:55 UTC (permalink / raw)
  To: adrien.mazarguil, nelio.laranjeiro; +Cc: dev, Yongseok Koh

Remove multi-segment packet handling from mlx5_tx_burst_empw() as there's
fallback to regular Tx for such packets.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 111 ++++++++++---------------------------------
 1 file changed, 26 insertions(+), 85 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index d5e32b845..67e3db168 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1351,21 +1351,13 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
 		unsigned int n;
 		unsigned int do_inline = 0; /* Whether inline is possible. */
 		uint32_t length;
-		unsigned int segs_n = buf->nb_segs;
 		uint8_t cs_flags;
 
-		/*
-		 * Make sure there is enough room to store this packet and
-		 * that one ring entry remains unused.
-		 */
-		assert(segs_n);
-		if (max_elts - j < segs_n)
+		/* Multi-segmented packet is handled in slow-path outside. */
+		assert(NB_SEGS(buf) == 1);
+		/* Make sure there is enough room to store this packet. */
+		if (max_elts - j == 0)
 			break;
-		/* Do not bother with large packets MPW cannot handle. */
-		if (segs_n > MLX5_MPW_DSEG_MAX) {
-			txq->stats.oerrors++;
-			break;
-		}
 		cs_flags = txq_ol_cksum_to_cs(txq, buf);
 		/* Retrieve packet information. */
 		length = PKT_LEN(buf);
@@ -1374,50 +1366,35 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
 		 * - no space left even for a dseg
 		 * - next packet can be inlined with a new WQE
 		 * - cs_flag differs
-		 * It can't be MLX5_MPW_STATE_OPENED as always have a single
-		 * segmented packet.
 		 */
 		if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) {
-			if ((segs_n != 1) ||
-			    (inl_pad + sizeof(struct mlx5_wqe_data_seg) >
-			      mpw_room) ||
+			if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) >
+			     mpw_room) ||
 			    (length <= txq->inline_max_packet_sz &&
 			     inl_pad + sizeof(inl_hdr) + length >
-			      mpw_room) ||
+			     mpw_room) ||
 			    (mpw.wqe->eseg.cs_flags != cs_flags))
 				max_wqe -= mlx5_empw_close(txq, &mpw);
 		}
 		if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) {
-			if (unlikely(segs_n != 1)) {
-				/* Fall back to legacy MPW.
-				 * A MPW session consumes 2 WQEs at most to
-				 * include MLX5_MPW_DSEG_MAX pointers.
-				 */
-				if (unlikely(max_wqe < 2))
-					break;
-				mlx5_mpw_new(txq, &mpw, length);
-			} else {
-				/* In Enhanced MPW, inline as much as the budget
-				 * is allowed. The remaining space is to be
-				 * filled with dsegs. If the title WQEBB isn't
-				 * padded, it will have 2 dsegs there.
-				 */
-				mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX,
-					    (max_inline ? max_inline :
-					     pkts_n * MLX5_WQE_DWORD_SIZE) +
-					    MLX5_WQE_SIZE);
-				if (unlikely(max_wqe * MLX5_WQE_SIZE <
-					      mpw_room))
-					break;
-				/* Don't pad the title WQEBB to not waste WQ. */
-				mlx5_empw_new(txq, &mpw, 0);
-				mpw_room -= mpw.total_len;
-				inl_pad = 0;
-				do_inline =
-					length <= txq->inline_max_packet_sz &&
-					sizeof(inl_hdr) + length <= mpw_room &&
-					!txq->mpw_hdr_dseg;
-			}
+			/* In Enhanced MPW, inline as much as the budget is
+			 * allowed. The remaining space is to be filled with
+			 * dsegs. If the title WQEBB isn't padded, it will have
+			 * 2 dsegs there.
+			 */
+			mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX,
+					   (max_inline ? max_inline :
+					    pkts_n * MLX5_WQE_DWORD_SIZE) +
+					   MLX5_WQE_SIZE);
+			if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room))
+				break;
+			/* Don't pad the title WQEBB to not waste WQ. */
+			mlx5_empw_new(txq, &mpw, 0);
+			mpw_room -= mpw.total_len;
+			inl_pad = 0;
+			do_inline = length <= txq->inline_max_packet_sz &&
+				    sizeof(inl_hdr) + length <= mpw_room &&
+				    !txq->mpw_hdr_dseg;
 			mpw.wqe->eseg.cs_flags = cs_flags;
 		} else {
 			/* Evaluate whether the next packet can be inlined.
@@ -1433,41 +1410,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
 				(!txq->mpw_hdr_dseg ||
 				 mpw.total_len >= MLX5_WQE_SIZE);
 		}
-		/* Multi-segment packets must be alone in their MPW. */
-		assert((segs_n == 1) || (mpw.pkts_n == 0));
-		if (unlikely(mpw.state == MLX5_MPW_STATE_OPENED)) {
-#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
-			length = 0;
-#endif
-			do {
-				volatile struct mlx5_wqe_data_seg *dseg;
-
-				assert(buf);
-				(*txq->elts)[elts_head++ & elts_m] = buf;
-				dseg = mpw.data.dseg[mpw.pkts_n];
-				addr = rte_pktmbuf_mtod(buf, uintptr_t);
-				*dseg = (struct mlx5_wqe_data_seg){
-					.byte_count = rte_cpu_to_be_32(
-								DATA_LEN(buf)),
-					.lkey = mlx5_tx_mb2mr(txq, buf),
-					.addr = rte_cpu_to_be_64(addr),
-				};
-#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
-				length += DATA_LEN(buf);
-#endif
-				buf = buf->next;
-				++j;
-				++mpw.pkts_n;
-			} while (--segs_n);
-			/* A multi-segmented packet takes one MPW session.
-			 * TODO: Pack more multi-segmented packets if possible.
-			 */
-			mlx5_mpw_close(txq, &mpw);
-			if (mpw.pkts_n < 3)
-				max_wqe--;
-			else
-				max_wqe -= 2;
-		} else if (do_inline) {
+		if (do_inline) {
 			/* Inline packet into WQE. */
 			unsigned int max;
 
@@ -1576,8 +1519,6 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
 #endif
 	if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED)
 		mlx5_empw_close(txq, &mpw);
-	else if (mpw.state == MLX5_MPW_STATE_OPENED)
-		mlx5_mpw_close(txq, &mpw);
 	/* Ring QP doorbell. */
 	mlx5_tx_dbrec(txq, mpw.wqe);
 	txq->elts_head = elts_head;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments
  2017-12-27  3:55 [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Yongseok Koh
                   ` (3 preceding siblings ...)
  2017-12-27  3:55 ` [dpdk-dev] [PATCH 5/5] net/mlx5: clean up multi-segment packet processing Yongseok Koh
@ 2017-12-27  6:06 ` Shahaf Shuler
  4 siblings, 0 replies; 6+ messages in thread
From: Shahaf Shuler @ 2017-12-27  6:06 UTC (permalink / raw)
  To: Yongseok Koh, Adrien Mazarguil, Nélio Laranjeiro; +Cc: dev, Yongseok Koh

Wednesday, December 27, 2017 5:56 AM, Yongseok Koh:
> 
> mlx5_tx_burst() doesn't inline data from the 2nd segment. If there's still
> enough room in the descriptor after inlining the 1st segment, further inlining
> from the 2nd segment would be beneficial to save PCIe bandwidth.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 44 +++++++++++++++++++++++++-----------
> --------
>  1 file changed, 25 insertions(+), 19 deletions(-)
> 

Series applied to next-net-mlx, thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-12-27  6:06 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-27  3:55 [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Yongseok Koh
2017-12-27  3:55 ` [dpdk-dev] [PATCH 2/5] net/mlx5: consolidate condition checks for TSO Yongseok Koh
2017-12-27  3:55 ` [dpdk-dev] [PATCH 3/5] net/mlx5: add missing sanity checks for Tx completion queue Yongseok Koh
2017-12-27  3:55 ` [dpdk-dev] [PATCH 4/5] net/mlx5: add fallback in Tx for multi-segment packet Yongseok Koh
2017-12-27  3:55 ` [dpdk-dev] [PATCH 5/5] net/mlx5: clean up multi-segment packet processing Yongseok Koh
2017-12-27  6:06 ` [dpdk-dev] [PATCH 1/5] net/mlx5: enable inlining data from multiple segments Shahaf Shuler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).