DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] common/idpf: rework single queue Tx function
@ 2023-08-25  7:21 Simei Su
  2023-08-25  7:48 ` Wu, Wenjun1
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Simei Su @ 2023-08-25  7:21 UTC (permalink / raw)
  To: jingjing.wu, beilei.xing, qi.z.zhang; +Cc: dev, wenjun1.wu, Simei Su

This patch replaces flex Tx descriptor structure with base Tx descriptor.

Signed-off-by: Simei Su <simei.su@intel.com>
---
 drivers/common/idpf/idpf_common_rxtx.c        | 72 +++++++++++++------
 drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 36 +++++-----
 drivers/net/idpf/idpf_rxtx.c                  |  2 +-
 4 files changed, 69 insertions(+), 43 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c
index fc87e3e243..67c124a614 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq)
 	}
 
 	txe = txq->sw_ring;
-	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
+	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
 	for (i = 0; i < size; i++)
 		((volatile char *)txq->tx_ring)[i] = 0;
 
 	prev = (uint16_t)(txq->nb_tx_desc - 1);
 	for (i = 0; i < txq->nb_tx_desc; i++) {
-		txq->tx_ring[i].qw1.cmd_dtype =
-			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
+		txq->tx_ring[i].qw1 =
+			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
 		txe[i].mbuf =  NULL;
 		txe[i].last_id = i;
 		txe[prev].next_id = i;
@@ -823,6 +823,33 @@ idpf_calc_context_desc(uint64_t flags)
 	return 0;
 }
 
+/* set TSO context descriptor for single queue
+ */
+static inline void
+idpf_set_singleq_tso_ctx(struct rte_mbuf *mbuf,
+			union idpf_tx_offload tx_offload,
+			volatile struct idpf_base_tx_ctx_desc *ctx_desc)
+{
+	uint16_t cmd_dtype;
+	uint32_t tso_len;
+	uint8_t hdr_len;
+
+	if (tx_offload.l4_len == 0) {
+		TX_LOG(DEBUG, "L4 length set to 0");
+		return;
+	}
+
+	hdr_len = tx_offload.l2_len +
+		tx_offload.l3_len +
+		tx_offload.l4_len;
+	cmd_dtype = IDPF_TX_CTX_DESC_TSO;
+	tso_len = mbuf->pkt_len - hdr_len;
+
+	ctx_desc->qw1 |= ((uint64_t)cmd_dtype << IDPF_TXD_CTX_QW1_CMD_S) |
+		((uint64_t)tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) |
+		((uint64_t)mbuf->tso_segsz << IDPF_TXD_CTX_QW1_MSS_S);
+}
+
 /* set TSO context descriptor
  */
 static inline void
@@ -1307,17 +1334,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 	uint16_t nb_tx_to_clean;
 	uint16_t i;
 
-	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
+	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
 
 	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
 	if (desc_to_clean_to >= nb_tx_desc)
 		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 
 	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-	/* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */
-	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
-	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
-	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
+	if ((txd[desc_to_clean_to].qw1 &
+	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
+	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
 		TX_LOG(DEBUG, "TX descriptor %4u is not done "
 		       "(port=%d queue=%d)", desc_to_clean_to,
 		       txq->port_id, txq->queue_id);
@@ -1331,10 +1357,7 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 					    last_desc_cleaned);
 
-	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
-	txd[desc_to_clean_to].qw1.buf_size = 0;
-	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
-		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
+	txd[desc_to_clean_to].qw1 = 0;
 
 	txq->last_desc_cleaned = desc_to_clean_to;
 	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
@@ -1347,8 +1370,8 @@ uint16_t
 idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			  uint16_t nb_pkts)
 {
-	volatile struct idpf_flex_tx_desc *txd;
-	volatile struct idpf_flex_tx_desc *txr;
+	volatile struct idpf_base_tx_desc *txd;
+	volatile struct idpf_base_tx_desc *txr;
 	union idpf_tx_offload tx_offload = {0};
 	struct idpf_tx_entry *txe, *txn;
 	struct idpf_tx_entry *sw_ring;
@@ -1356,6 +1379,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf *tx_pkt;
 	struct rte_mbuf *m_seg;
 	uint64_t buf_dma_addr;
+	uint32_t td_offset;
 	uint64_t ol_flags;
 	uint16_t tx_last;
 	uint16_t nb_used;
@@ -1382,6 +1406,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		td_cmd = 0;
+		td_offset = 0;
 
 		tx_pkt = *tx_pkts++;
 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
@@ -1428,8 +1453,8 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 		if (nb_ctx != 0) {
 			/* Setup TX context descriptor if required */
-			volatile union idpf_flex_tx_ctx_desc *ctx_txd =
-				(volatile union idpf_flex_tx_ctx_desc *)
+			volatile struct idpf_base_tx_ctx_desc *ctx_txd =
+				(volatile struct idpf_base_tx_ctx_desc *)
 				&txr[tx_id];
 
 			txn = &sw_ring[txe->next_id];
@@ -1441,7 +1466,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 			/* TSO enabled */
 			if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
-				idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
+				idpf_set_singleq_tso_ctx(tx_pkt, tx_offload,
 							ctx_txd);
 
 			txe->last_id = tx_last;
@@ -1462,9 +1487,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			slen = m_seg->data_len;
 			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
-			txd->qw1.buf_size = slen;
-			txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
-							      IDPF_FLEX_TXD_QW1_DTYPE_S);
+			txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
+				((uint64_t)td_cmd  << IDPF_TXD_QW1_CMD_S) |
+				((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) |
+				((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 			txe->last_id = tx_last;
 			tx_id = txe->next_id;
@@ -1473,7 +1499,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		} while (m_seg);
 
 		/* The last packet data descriptor needs End Of Packet (EOP) */
-		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
+		td_cmd |= IDPF_TX_DESC_CMD_EOP;
 		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
 		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
 
@@ -1482,7 +1508,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       "%4u (port=%d queue=%d)",
 			       tx_last, txq->port_id, txq->queue_id);
 
-			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
+			td_cmd |= IDPF_TX_DESC_CMD_RS;
 
 			/* Update txq RS bit counters */
 			txq->nb_used = 0;
@@ -1491,7 +1517,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
 			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
 
-		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S);
+		txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
 	}
 
 end_of_tx:
diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h
index 6cb83fc0a6..b49b1ed737 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -157,7 +157,7 @@ struct idpf_tx_entry {
 /* Structure associated with each TX queue. */
 struct idpf_tx_queue {
 	const struct rte_memzone *mz;		/* memzone for Tx ring */
-	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual address */
+	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual address */
 	volatile union {
 		struct idpf_flex_tx_sched_desc *desc_ring;
 		struct idpf_splitq_tx_compl_desc *compl_ring;
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 81312617cc..36ff2af966 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq)
 	struct rte_mbuf *m, *free[txq->rs_thresh];
 
 	/* check DD bits on threshold descriptor */
-	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
+	if ((txq->tx_ring[txq->next_dd].qw1 &
 			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
 			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
@@ -1115,13 +1115,13 @@ tx_backlog_entry_avx512(struct idpf_tx_vec_entry *txep,
 
 #define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48
 static __rte_always_inline void
-idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
 {
 	uint64_t high_qw =
-		(IDPF_TX_DESC_DTYPE_FLEX_DATA << IDPF_FLEX_TXD_QW1_DTYPE_S |
-		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
-		 ((uint64_t)pkt->data_len << IDPF_FLEX_TXD_QW1_BUF_SZ_S));
+		(IDPF_TX_DESC_DTYPE_DATA |
+		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
+		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1131,11 +1131,11 @@ idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
 #define IDPF_TX_LEN_MASK 0xAA
 #define IDPF_TX_OFF_MASK 0x55
 static __rte_always_inline void
-idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
 	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
-	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
-			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
+	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
+			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1148,19 +1148,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
 		uint64_t hi_qw3 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1187,11 +1187,11 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 					 uint16_t nb_pkts)
 {
 	struct idpf_tx_queue *txq = tx_queue;
-	volatile struct idpf_flex_tx_desc *txdp;
+	volatile struct idpf_base_tx_desc *txdp;
 	struct idpf_tx_vec_entry *txep;
 	uint16_t n, nb_commit, tx_id;
-	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
-	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
+	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
+	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
 
 	/* cross rx_thresh boundary is not allowed */
 	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);
@@ -1238,9 +1238,9 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
-		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
-			rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
-					 IDPF_FLEX_TXD_QW1_CMD_S);
+		txq->tx_ring[txq->next_rs].qw1 |=
+			rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
+					 IDPF_TXD_QW1_CMD_S);
 		txq->next_rs =
 			(uint16_t)(txq->next_rs + txq->rs_thresh);
 	}
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 3e3d81ca6d..64f2235580 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
 		break;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] common/idpf: rework single queue Tx function
  2023-08-25  7:21 [PATCH] common/idpf: rework single queue Tx function Simei Su
@ 2023-08-25  7:48 ` Wu, Wenjun1
  2023-08-25  8:14 ` Zhang, Qi Z
  2023-09-04  7:02 ` [PATCH v2] common/idpf: refactor " Simei Su
  2 siblings, 0 replies; 16+ messages in thread
From: Wu, Wenjun1 @ 2023-08-25  7:48 UTC (permalink / raw)
  To: Su, Simei, Wu, Jingjing, Xing, Beilei, Zhang, Qi Z; +Cc: dev



> -----Original Message-----
> From: Su, Simei <simei.su@intel.com>
> Sent: Friday, August 25, 2023 3:21 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>;
> Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei
> <simei.su@intel.com>
> Subject: [PATCH] common/idpf: rework single queue Tx function
> 
> This patch replaces flex Tx descriptor structure with base Tx descriptor.
> 
> Signed-off-by: Simei Su <simei.su@intel.com>
> ---
>  drivers/common/idpf/idpf_common_rxtx.c        | 72 +++++++++++++------
>  drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
>  drivers/common/idpf/idpf_common_rxtx_avx512.c | 36 +++++-----
>  drivers/net/idpf/idpf_rxtx.c                  |  2 +-
>  4 files changed, 69 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/common/idpf/idpf_common_rxtx.c
> b/drivers/common/idpf/idpf_common_rxtx.c
> index fc87e3e243..67c124a614 100644
> --- a/drivers/common/idpf/idpf_common_rxtx.c
> +++ b/drivers/common/idpf/idpf_common_rxtx.c
> @@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct
> idpf_tx_queue *txq)
>  	}
> 
>  	txe = txq->sw_ring;
> -	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
> +	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
>  	for (i = 0; i < size; i++)
>  		((volatile char *)txq->tx_ring)[i] = 0;
> 
>  	prev = (uint16_t)(txq->nb_tx_desc - 1);
>  	for (i = 0; i < txq->nb_tx_desc; i++) {
> -		txq->tx_ring[i].qw1.cmd_dtype =
> -
> 	rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
> +		txq->tx_ring[i].qw1 =
> +
> 	rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
>  		txe[i].mbuf =  NULL;
>  		txe[i].last_id = i;
>  		txe[prev].next_id = i;
> @@ -823,6 +823,33 @@ idpf_calc_context_desc(uint64_t flags)
>  	return 0;
>  }
> 
> +/* set TSO context descriptor for single queue  */ static inline void
> +idpf_set_singleq_tso_ctx(struct rte_mbuf *mbuf,
> +			union idpf_tx_offload tx_offload,
> +			volatile struct idpf_base_tx_ctx_desc *ctx_desc) {
> +	uint16_t cmd_dtype;
> +	uint32_t tso_len;
> +	uint8_t hdr_len;
> +
> +	if (tx_offload.l4_len == 0) {
> +		TX_LOG(DEBUG, "L4 length set to 0");
> +		return;
> +	}
> +
> +	hdr_len = tx_offload.l2_len +
> +		tx_offload.l3_len +
> +		tx_offload.l4_len;
> +	cmd_dtype = IDPF_TX_CTX_DESC_TSO;

The cmd_dtype for base mode context TSO descriptor should be 
cmd_dtype = IDPF_TX_DESC_DTYPE_CTX | IDPF_TX_CTX_DESC_TSO << IDPF_TXD_QW1_CMD_S;

> +	tso_len = mbuf->pkt_len - hdr_len;
> +
> +	ctx_desc->qw1 |= ((uint64_t)cmd_dtype <<
> IDPF_TXD_CTX_QW1_CMD_S) |
> +		((uint64_t)tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) |
> +		((uint64_t)mbuf->tso_segsz <<
> IDPF_TXD_CTX_QW1_MSS_S); }

It seems better to add mask & here to avoid crossing.

>...

Regards,
Wenjun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] common/idpf: rework single queue Tx function
  2023-08-25  7:21 [PATCH] common/idpf: rework single queue Tx function Simei Su
  2023-08-25  7:48 ` Wu, Wenjun1
@ 2023-08-25  8:14 ` Zhang, Qi Z
  2023-09-04  7:02 ` [PATCH v2] common/idpf: refactor " Simei Su
  2 siblings, 0 replies; 16+ messages in thread
From: Zhang, Qi Z @ 2023-08-25  8:14 UTC (permalink / raw)
  To: Su, Simei, Wu, Jingjing, Xing, Beilei; +Cc: dev, Wu, Wenjun1



> -----Original Message-----
> From: Su, Simei <simei.su@intel.com>
> Sent: Friday, August 25, 2023 3:21 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>;
> Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei
> <simei.su@intel.com>
> Subject: [PATCH] common/idpf: rework single queue Tx function
> 
> This patch replaces flex Tx descriptor structure with base Tx descriptor.

Could you add some description about why we need this?

For example:
It could be to fix some bug
A pre-condition for some new feature enabling
Or just code refactor to align with kernel driver implementation.
...


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2] common/idpf: refactor single queue Tx function
  2023-08-25  7:21 [PATCH] common/idpf: rework single queue Tx function Simei Su
  2023-08-25  7:48 ` Wu, Wenjun1
  2023-08-25  8:14 ` Zhang, Qi Z
@ 2023-09-04  7:02 ` Simei Su
  2023-09-08 10:28   ` [PATCH v3] " Simei Su
  2 siblings, 1 reply; 16+ messages in thread
From: Simei Su @ 2023-09-04  7:02 UTC (permalink / raw)
  To: jingjing.wu, beilei.xing, qi.z.zhang; +Cc: dev, wenjun1.wu, Simei Su

This patch replaces flex Tx descriptor with base Tx descriptor to align
with kernel driver practice.

Signed-off-by: Simei Su <simei.su@intel.com>
---
v2:
* Refine commit title and commit log.
* Remove redundant definition.
* Modify base mode context TSO descriptor.

 drivers/common/idpf/idpf_common_rxtx.c        | 76 +++++++++++++------
 drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++----
 drivers/net/idpf/idpf_rxtx.c                  |  2 +-
 4 files changed, 73 insertions(+), 44 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c
index fc87e3e243..01a8685ea3 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq)
 	}
 
 	txe = txq->sw_ring;
-	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
+	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
 	for (i = 0; i < size; i++)
 		((volatile char *)txq->tx_ring)[i] = 0;
 
 	prev = (uint16_t)(txq->nb_tx_desc - 1);
 	for (i = 0; i < txq->nb_tx_desc; i++) {
-		txq->tx_ring[i].qw1.cmd_dtype =
-			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
+		txq->tx_ring[i].qw1 =
+			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
 		txe[i].mbuf =  NULL;
 		txe[i].last_id = i;
 		txe[prev].next_id = i;
@@ -823,6 +823,37 @@ idpf_calc_context_desc(uint64_t flags)
 	return 0;
 }
 
+/* set TSO context descriptor for single queue
+ */
+static inline void
+idpf_set_singleq_tso_ctx(struct rte_mbuf *mbuf,
+			union idpf_tx_offload tx_offload,
+			volatile struct idpf_base_tx_ctx_desc *ctx_desc)
+{
+	uint32_t tso_len;
+	uint8_t hdr_len;
+	uint64_t qw1;
+
+	if (tx_offload.l4_len == 0) {
+		TX_LOG(DEBUG, "L4 length set to 0");
+		return;
+	}
+
+	hdr_len = tx_offload.l2_len +
+		tx_offload.l3_len +
+		tx_offload.l4_len;
+	tso_len = mbuf->pkt_len - hdr_len;
+	qw1 = (uint64_t)IDPF_TX_DESC_DTYPE_CTX;
+
+	qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S;
+	qw1 |= ((uint64_t)tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) &
+		IDPF_TXD_CTX_QW1_TSO_LEN_M;
+	qw1 |= ((uint64_t)mbuf->tso_segsz << IDPF_TXD_CTX_QW1_MSS_S) &
+		IDPF_TXD_CTX_QW1_MSS_M;
+
+	ctx_desc->qw1 = rte_cpu_to_le_64(qw1);
+}
+
 /* set TSO context descriptor
  */
 static inline void
@@ -1307,17 +1338,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 	uint16_t nb_tx_to_clean;
 	uint16_t i;
 
-	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
+	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
 
 	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
 	if (desc_to_clean_to >= nb_tx_desc)
 		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 
 	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-	/* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */
-	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
-	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
-	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
+	if ((txd[desc_to_clean_to].qw1 &
+	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
+	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
 		TX_LOG(DEBUG, "TX descriptor %4u is not done "
 		       "(port=%d queue=%d)", desc_to_clean_to,
 		       txq->port_id, txq->queue_id);
@@ -1331,10 +1361,7 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 					    last_desc_cleaned);
 
-	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
-	txd[desc_to_clean_to].qw1.buf_size = 0;
-	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
-		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
+	txd[desc_to_clean_to].qw1 = 0;
 
 	txq->last_desc_cleaned = desc_to_clean_to;
 	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
@@ -1347,8 +1374,8 @@ uint16_t
 idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			  uint16_t nb_pkts)
 {
-	volatile struct idpf_flex_tx_desc *txd;
-	volatile struct idpf_flex_tx_desc *txr;
+	volatile struct idpf_base_tx_desc *txd;
+	volatile struct idpf_base_tx_desc *txr;
 	union idpf_tx_offload tx_offload = {0};
 	struct idpf_tx_entry *txe, *txn;
 	struct idpf_tx_entry *sw_ring;
@@ -1356,6 +1383,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf *tx_pkt;
 	struct rte_mbuf *m_seg;
 	uint64_t buf_dma_addr;
+	uint32_t td_offset;
 	uint64_t ol_flags;
 	uint16_t tx_last;
 	uint16_t nb_used;
@@ -1382,6 +1410,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		td_cmd = 0;
+		td_offset = 0;
 
 		tx_pkt = *tx_pkts++;
 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
@@ -1428,8 +1457,8 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 		if (nb_ctx != 0) {
 			/* Setup TX context descriptor if required */
-			volatile union idpf_flex_tx_ctx_desc *ctx_txd =
-				(volatile union idpf_flex_tx_ctx_desc *)
+			volatile struct idpf_base_tx_ctx_desc *ctx_txd =
+				(volatile struct idpf_base_tx_ctx_desc *)
 				&txr[tx_id];
 
 			txn = &sw_ring[txe->next_id];
@@ -1441,7 +1470,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 			/* TSO enabled */
 			if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
-				idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
+				idpf_set_singleq_tso_ctx(tx_pkt, tx_offload,
 							ctx_txd);
 
 			txe->last_id = tx_last;
@@ -1462,9 +1491,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			slen = m_seg->data_len;
 			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
-			txd->qw1.buf_size = slen;
-			txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
-							      IDPF_FLEX_TXD_QW1_DTYPE_S);
+			txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
+				((uint64_t)td_cmd  << IDPF_TXD_QW1_CMD_S) |
+				((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) |
+				((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 			txe->last_id = tx_last;
 			tx_id = txe->next_id;
@@ -1473,7 +1503,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		} while (m_seg);
 
 		/* The last packet data descriptor needs End Of Packet (EOP) */
-		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
+		td_cmd |= IDPF_TX_DESC_CMD_EOP;
 		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
 		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
 
@@ -1482,7 +1512,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       "%4u (port=%d queue=%d)",
 			       tx_last, txq->port_id, txq->queue_id);
 
-			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
+			td_cmd |= IDPF_TX_DESC_CMD_RS;
 
 			/* Update txq RS bit counters */
 			txq->nb_used = 0;
@@ -1491,7 +1521,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
 			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
 
-		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S);
+		txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
 	}
 
 end_of_tx:
diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h
index 6cb83fc0a6..b49b1ed737 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -157,7 +157,7 @@ struct idpf_tx_entry {
 /* Structure associated with each TX queue. */
 struct idpf_tx_queue {
 	const struct rte_memzone *mz;		/* memzone for Tx ring */
-	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual address */
+	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual address */
 	volatile union {
 		struct idpf_flex_tx_sched_desc *desc_ring;
 		struct idpf_splitq_tx_compl_desc *compl_ring;
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 81312617cc..afb0014a13 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq)
 	struct rte_mbuf *m, *free[txq->rs_thresh];
 
 	/* check DD bits on threshold descriptor */
-	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
+	if ((txq->tx_ring[txq->next_dd].qw1 &
 			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
 			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
@@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct idpf_tx_vec_entry *txep,
 		txep[i].mbuf = tx_pkts[i];
 }
 
-#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48
 static __rte_always_inline void
-idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
 {
 	uint64_t high_qw =
-		(IDPF_TX_DESC_DTYPE_FLEX_DATA << IDPF_FLEX_TXD_QW1_DTYPE_S |
-		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
-		 ((uint64_t)pkt->data_len << IDPF_FLEX_TXD_QW1_BUF_SZ_S));
+		(IDPF_TX_DESC_DTYPE_DATA |
+		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
+		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1131,11 +1130,11 @@ idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
 #define IDPF_TX_LEN_MASK 0xAA
 #define IDPF_TX_OFF_MASK 0x55
 static __rte_always_inline void
-idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
 	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
-	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
-			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
+	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
+			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1148,19 +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
 		uint64_t hi_qw3 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1187,11 +1186,11 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 					 uint16_t nb_pkts)
 {
 	struct idpf_tx_queue *txq = tx_queue;
-	volatile struct idpf_flex_tx_desc *txdp;
+	volatile struct idpf_base_tx_desc *txdp;
 	struct idpf_tx_vec_entry *txep;
 	uint16_t n, nb_commit, tx_id;
-	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
-	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
+	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
+	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
 
 	/* cross rx_thresh boundary is not allowed */
 	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);
@@ -1238,9 +1237,9 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
-		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
-			rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
-					 IDPF_FLEX_TXD_QW1_CMD_S);
+		txq->tx_ring[txq->next_rs].qw1 |=
+			rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
+					 IDPF_TXD_QW1_CMD_S);
 		txq->next_rs =
 			(uint16_t)(txq->next_rs + txq->rs_thresh);
 	}
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 3e3d81ca6d..64f2235580 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
 		break;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v3] common/idpf: refactor single queue Tx function
  2023-09-04  7:02 ` [PATCH v2] common/idpf: refactor " Simei Su
@ 2023-09-08 10:28   ` Simei Su
  2023-09-13  5:57     ` Wu, Wenjun1
                       ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Simei Su @ 2023-09-08 10:28 UTC (permalink / raw)
  To: jingjing.wu, beilei.xing, qi.z.zhang; +Cc: dev, wenjun1.wu, Simei Su

This patch replaces flex Tx descriptor with base Tx descriptor to align
with kernel driver practice.

Signed-off-by: Simei Su <simei.su@intel.com>
---
v3:
* Change context TSO descriptor from base mode to flex mode.

v2:
* Refine commit title and commit log.
* Remove redundant definition.
* Modify base mode context TSO descriptor.

 drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
 drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
 drivers/net/idpf/idpf_rxtx.c                  |  2 +-
 4 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c
index fc87e3e243..e6d2486272 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq)
 	}
 
 	txe = txq->sw_ring;
-	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
+	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
 	for (i = 0; i < size; i++)
 		((volatile char *)txq->tx_ring)[i] = 0;
 
 	prev = (uint16_t)(txq->nb_tx_desc - 1);
 	for (i = 0; i < txq->nb_tx_desc; i++) {
-		txq->tx_ring[i].qw1.cmd_dtype =
-			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
+		txq->tx_ring[i].qw1 =
+			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
 		txe[i].mbuf =  NULL;
 		txe[i].last_id = i;
 		txe[prev].next_id = i;
@@ -1307,17 +1307,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 	uint16_t nb_tx_to_clean;
 	uint16_t i;
 
-	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
+	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
 
 	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
 	if (desc_to_clean_to >= nb_tx_desc)
 		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 
 	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-	/* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */
-	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
-	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
-	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
+	if ((txd[desc_to_clean_to].qw1 &
+	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
+	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
 		TX_LOG(DEBUG, "TX descriptor %4u is not done "
 		       "(port=%d queue=%d)", desc_to_clean_to,
 		       txq->port_id, txq->queue_id);
@@ -1331,10 +1330,7 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 					    last_desc_cleaned);
 
-	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
-	txd[desc_to_clean_to].qw1.buf_size = 0;
-	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
-		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
+	txd[desc_to_clean_to].qw1 = 0;
 
 	txq->last_desc_cleaned = desc_to_clean_to;
 	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
@@ -1347,8 +1343,8 @@ uint16_t
 idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			  uint16_t nb_pkts)
 {
-	volatile struct idpf_flex_tx_desc *txd;
-	volatile struct idpf_flex_tx_desc *txr;
+	volatile struct idpf_base_tx_desc *txd;
+	volatile struct idpf_base_tx_desc *txr;
 	union idpf_tx_offload tx_offload = {0};
 	struct idpf_tx_entry *txe, *txn;
 	struct idpf_tx_entry *sw_ring;
@@ -1356,6 +1352,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf *tx_pkt;
 	struct rte_mbuf *m_seg;
 	uint64_t buf_dma_addr;
+	uint32_t td_offset;
 	uint64_t ol_flags;
 	uint16_t tx_last;
 	uint16_t nb_used;
@@ -1382,6 +1379,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		td_cmd = 0;
+		td_offset = 0;
 
 		tx_pkt = *tx_pkts++;
 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
@@ -1462,9 +1460,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			slen = m_seg->data_len;
 			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
-			txd->qw1.buf_size = slen;
-			txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
-							      IDPF_FLEX_TXD_QW1_DTYPE_S);
+			txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
+				((uint64_t)td_cmd  << IDPF_TXD_QW1_CMD_S) |
+				((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) |
+				((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 			txe->last_id = tx_last;
 			tx_id = txe->next_id;
@@ -1473,7 +1472,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		} while (m_seg);
 
 		/* The last packet data descriptor needs End Of Packet (EOP) */
-		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
+		td_cmd |= IDPF_TX_DESC_CMD_EOP;
 		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
 		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
 
@@ -1482,7 +1481,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       "%4u (port=%d queue=%d)",
 			       tx_last, txq->port_id, txq->queue_id);
 
-			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
+			td_cmd |= IDPF_TX_DESC_CMD_RS;
 
 			/* Update txq RS bit counters */
 			txq->nb_used = 0;
@@ -1491,7 +1490,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
 			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
 
-		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S);
+		txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
 	}
 
 end_of_tx:
diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h
index 6cb83fc0a6..b49b1ed737 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -157,7 +157,7 @@ struct idpf_tx_entry {
 /* Structure associated with each TX queue. */
 struct idpf_tx_queue {
 	const struct rte_memzone *mz;		/* memzone for Tx ring */
-	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual address */
+	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual address */
 	volatile union {
 		struct idpf_flex_tx_sched_desc *desc_ring;
 		struct idpf_splitq_tx_compl_desc *compl_ring;
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 81312617cc..afb0014a13 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq)
 	struct rte_mbuf *m, *free[txq->rs_thresh];
 
 	/* check DD bits on threshold descriptor */
-	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
+	if ((txq->tx_ring[txq->next_dd].qw1 &
 			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
 			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
@@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct idpf_tx_vec_entry *txep,
 		txep[i].mbuf = tx_pkts[i];
 }
 
-#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48
 static __rte_always_inline void
-idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
 {
 	uint64_t high_qw =
-		(IDPF_TX_DESC_DTYPE_FLEX_DATA << IDPF_FLEX_TXD_QW1_DTYPE_S |
-		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
-		 ((uint64_t)pkt->data_len << IDPF_FLEX_TXD_QW1_BUF_SZ_S));
+		(IDPF_TX_DESC_DTYPE_DATA |
+		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
+		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1131,11 +1130,11 @@ idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
 #define IDPF_TX_LEN_MASK 0xAA
 #define IDPF_TX_OFF_MASK 0x55
 static __rte_always_inline void
-idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
 	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
-	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
-			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
+	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
+			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1148,19 +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
 		uint64_t hi_qw3 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1187,11 +1186,11 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 					 uint16_t nb_pkts)
 {
 	struct idpf_tx_queue *txq = tx_queue;
-	volatile struct idpf_flex_tx_desc *txdp;
+	volatile struct idpf_base_tx_desc *txdp;
 	struct idpf_tx_vec_entry *txep;
 	uint16_t n, nb_commit, tx_id;
-	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
-	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
+	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
+	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
 
 	/* cross rx_thresh boundary is not allowed */
 	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);
@@ -1238,9 +1237,9 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
-		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
-			rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
-					 IDPF_FLEX_TXD_QW1_CMD_S);
+		txq->tx_ring[txq->next_rs].qw1 |=
+			rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
+					 IDPF_TXD_QW1_CMD_S);
 		txq->next_rs =
 			(uint16_t)(txq->next_rs + txq->rs_thresh);
 	}
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 3e3d81ca6d..64f2235580 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
 		break;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3] common/idpf: refactor single queue Tx function
  2023-09-08 10:28   ` [PATCH v3] " Simei Su
@ 2023-09-13  5:57     ` Wu, Wenjun1
  2023-09-13  7:45       ` Zhang, Qi Z
  2023-09-13  6:07     ` Xing, Beilei
  2023-09-14  1:50     ` [PATCH v4 0/3] refactor single queue Tx data path Simei Su
  2 siblings, 1 reply; 16+ messages in thread
From: Wu, Wenjun1 @ 2023-09-13  5:57 UTC (permalink / raw)
  To: Su, Simei, Wu, Jingjing, Xing, Beilei, Zhang, Qi Z; +Cc: dev



> -----Original Message-----
> From: Su, Simei <simei.su@intel.com>
> Sent: Friday, September 8, 2023 6:28 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>;
> Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei
> <simei.su@intel.com>
> Subject: [PATCH v3] common/idpf: refactor single queue Tx function
> 
> This patch replaces flex Tx descriptor with base Tx descriptor to align with
> kernel driver practice.
> 
> Signed-off-by: Simei Su <simei.su@intel.com>
> ---
> v3:
> * Change context TSO descriptor from base mode to flex mode.
> 
> v2:
> * Refine commit title and commit log.
> * Remove redundant definition.
> * Modify base mode context TSO descriptor.
> 
>  drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
>  drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
>  drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
>  drivers/net/idpf/idpf_rxtx.c                  |  2 +-
>  4 files changed, 39 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/common/idpf/idpf_common_rxtx.c
> b/drivers/common/idpf/idpf_common_rxtx.c
> index fc87e3e243..e6d2486272 100644
> --- a/drivers/common/idpf/idpf_common_rxtx.c
> +++ b/drivers/common/idpf/idpf_common_rxtx.c
> @@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct
> idpf_tx_queue *txq)
>  	}
> 
>  	txe = txq->sw_ring;
> -	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
> +	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
>  	for (i = 0; i < size; i++)
>  		((volatile char *)txq->tx_ring)[i] = 0;
> 
>  	prev = (uint16_t)(txq->nb_tx_desc - 1);
>  	for (i = 0; i < txq->nb_tx_desc; i++) {
> -		txq->tx_ring[i].qw1.cmd_dtype =
> -
> 	rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
> +		txq->tx_ring[i].qw1 =
> +
> 	rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
>  		txe[i].mbuf =  NULL;
>  		txe[i].last_id = i;
>  		txe[prev].next_id = i;
> @@ -1307,17 +1307,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
>  	uint16_t nb_tx_to_clean;
>  	uint16_t i;
> 
> -	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
> +	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
> 
>  	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
>  	if (desc_to_clean_to >= nb_tx_desc)
>  		desc_to_clean_to = (uint16_t)(desc_to_clean_to -
> nb_tx_desc);
> 
>  	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
> -	/* In the writeback Tx desccriptor, the only significant fields are the 4-
> bit DTYPE */
> -	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
> -	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
> -	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
> +	if ((txd[desc_to_clean_to].qw1 &
> +	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
> +	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
>  		TX_LOG(DEBUG, "TX descriptor %4u is not done "
>  		       "(port=%d queue=%d)", desc_to_clean_to,
>  		       txq->port_id, txq->queue_id);
> @@ -1331,10 +1330,7 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
>  		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
>  					    last_desc_cleaned);
> 
> -	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
> -	txd[desc_to_clean_to].qw1.buf_size = 0;
> -	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
> -		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
> +	txd[desc_to_clean_to].qw1 = 0;
> 
>  	txq->last_desc_cleaned = desc_to_clean_to;
>  	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); @@ -
> 1347,8 +1343,8 @@ uint16_t  idpf_dp_singleq_xmit_pkts(void *tx_queue,
> struct rte_mbuf **tx_pkts,
>  			  uint16_t nb_pkts)
>  {
> -	volatile struct idpf_flex_tx_desc *txd;
> -	volatile struct idpf_flex_tx_desc *txr;
> +	volatile struct idpf_base_tx_desc *txd;
> +	volatile struct idpf_base_tx_desc *txr;
>  	union idpf_tx_offload tx_offload = {0};
>  	struct idpf_tx_entry *txe, *txn;
>  	struct idpf_tx_entry *sw_ring;
> @@ -1356,6 +1352,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct
> rte_mbuf **tx_pkts,
>  	struct rte_mbuf *tx_pkt;
>  	struct rte_mbuf *m_seg;
>  	uint64_t buf_dma_addr;
> +	uint32_t td_offset;
>  	uint64_t ol_flags;
>  	uint16_t tx_last;
>  	uint16_t nb_used;
> @@ -1382,6 +1379,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct
> rte_mbuf **tx_pkts,
> 
>  	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
>  		td_cmd = 0;
> +		td_offset = 0;
> 
>  		tx_pkt = *tx_pkts++;
>  		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
> @@ -1462,9 +1460,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct
> rte_mbuf **tx_pkts,
>  			slen = m_seg->data_len;
>  			buf_dma_addr = rte_mbuf_data_iova(m_seg);
>  			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
> -			txd->qw1.buf_size = slen;
> -			txd->qw1.cmd_dtype =
> rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
> -
> IDPF_FLEX_TXD_QW1_DTYPE_S);
> +			txd->qw1 =
> rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
> +				((uint64_t)td_cmd  <<
> IDPF_TXD_QW1_CMD_S) |
> +				((uint64_t)td_offset <<
> IDPF_TXD_QW1_OFFSET_S) |
> +				((uint64_t)slen <<
> IDPF_TXD_QW1_TX_BUF_SZ_S));
> 
>  			txe->last_id = tx_last;
>  			tx_id = txe->next_id;
> @@ -1473,7 +1472,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct
> rte_mbuf **tx_pkts,
>  		} while (m_seg);
> 
>  		/* The last packet data descriptor needs End Of Packet (EOP)
> */
> -		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
> +		td_cmd |= IDPF_TX_DESC_CMD_EOP;
>  		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
>  		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
> 
> @@ -1482,7 +1481,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct
> rte_mbuf **tx_pkts,
>  			       "%4u (port=%d queue=%d)",
>  			       tx_last, txq->port_id, txq->queue_id);
> 
> -			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
> +			td_cmd |= IDPF_TX_DESC_CMD_RS;
> 
>  			/* Update txq RS bit counters */
>  			txq->nb_used = 0;
> @@ -1491,7 +1490,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct
> rte_mbuf **tx_pkts,
>  		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
>  			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
> 
> -		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd <<
> IDPF_FLEX_TXD_QW1_CMD_S);
> +		txd->qw1 |= rte_cpu_to_le_16(td_cmd <<
> IDPF_TXD_QW1_CMD_S);
>  	}
> 
>  end_of_tx:
> diff --git a/drivers/common/idpf/idpf_common_rxtx.h
> b/drivers/common/idpf/idpf_common_rxtx.h
> index 6cb83fc0a6..b49b1ed737 100644
> --- a/drivers/common/idpf/idpf_common_rxtx.h
> +++ b/drivers/common/idpf/idpf_common_rxtx.h
> @@ -157,7 +157,7 @@ struct idpf_tx_entry {
>  /* Structure associated with each TX queue. */  struct idpf_tx_queue {
>  	const struct rte_memzone *mz;		/* memzone for Tx ring */
> -	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual
> address */
> +	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual
> address */
>  	volatile union {
>  		struct idpf_flex_tx_sched_desc *desc_ring;
>  		struct idpf_splitq_tx_compl_desc *compl_ring; diff --git
> a/drivers/common/idpf/idpf_common_rxtx_avx512.c
> b/drivers/common/idpf/idpf_common_rxtx_avx512.c
> index 81312617cc..afb0014a13 100644
> --- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
> +++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
> @@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct
> idpf_tx_queue *txq)
>  	struct rte_mbuf *m, *free[txq->rs_thresh];
> 
>  	/* check DD bits on threshold descriptor */
> -	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
> +	if ((txq->tx_ring[txq->next_dd].qw1 &
>  			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
> 
> 	rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
>  		return 0;
> @@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct
> idpf_tx_vec_entry *txep,
>  		txep[i].mbuf = tx_pkts[i];
>  }
> 
> -#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48
>  static __rte_always_inline void
> -idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
> +idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
>  	  struct rte_mbuf *pkt, uint64_t flags)  {
>  	uint64_t high_qw =
> -		(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
> IDPF_FLEX_TXD_QW1_DTYPE_S |
> -		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
> -		 ((uint64_t)pkt->data_len <<
> IDPF_FLEX_TXD_QW1_BUF_SZ_S));
> +		(IDPF_TX_DESC_DTYPE_DATA |
> +		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
> +		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
> 
>  	__m128i descriptor = _mm_set_epi64x(high_qw,
>  					    pkt->buf_iova + pkt->data_off);
> @@ -1131,11 +1130,11 @@ idpf_singleq_vtx1(volatile struct
> idpf_flex_tx_desc *txdp,  #define IDPF_TX_LEN_MASK 0xAA  #define
> IDPF_TX_OFF_MASK 0x55  static __rte_always_inline void -
> idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
> +idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
>  	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)  {
> -	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
> -			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
> +	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
> +			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
> 
>  	/* if unaligned on 32-bit boundary, do one to align */
>  	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) { @@ -1148,19
> +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
>  		uint64_t hi_qw3 =
>  			hi_qw_tmpl |
>  			((uint64_t)pkt[3]->data_len <<
> -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
>  		uint64_t hi_qw2 =
>  			hi_qw_tmpl |
>  			((uint64_t)pkt[2]->data_len <<
> -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
>  		uint64_t hi_qw1 =
>  			hi_qw_tmpl |
>  			((uint64_t)pkt[1]->data_len <<
> -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
>  		uint64_t hi_qw0 =
>  			hi_qw_tmpl |
>  			((uint64_t)pkt[0]->data_len <<
> -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> 
>  		__m512i desc0_3 =
>  			_mm512_set_epi64
> @@ -1187,11 +1186,11 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void
> *tx_queue, struct rte_mbuf **tx_pk
>  					 uint16_t nb_pkts)
>  {
>  	struct idpf_tx_queue *txq = tx_queue;
> -	volatile struct idpf_flex_tx_desc *txdp;
> +	volatile struct idpf_base_tx_desc *txdp;
>  	struct idpf_tx_vec_entry *txep;
>  	uint16_t n, nb_commit, tx_id;
> -	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
> -	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
> +	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
> +	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
> 
>  	/* cross rx_thresh boundary is not allowed */
>  	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh); @@ -1238,9 +1237,9
> @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct
> rte_mbuf **tx_pk
> 
>  	tx_id = (uint16_t)(tx_id + nb_commit);
>  	if (tx_id > txq->next_rs) {
> -		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
> -
> 	rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
> -					 IDPF_FLEX_TXD_QW1_CMD_S);
> +		txq->tx_ring[txq->next_rs].qw1 |=
> +
> 	rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
> +					 IDPF_TXD_QW1_CMD_S);
>  		txq->next_rs =
>  			(uint16_t)(txq->next_rs + txq->rs_thresh);
>  	}
> diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c index
> 3e3d81ca6d..64f2235580 100644
> --- a/drivers/net/idpf/idpf_rxtx.c
> +++ b/drivers/net/idpf/idpf_rxtx.c
> @@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev,
> uint16_t queue_idx,
>  			ring_size = RTE_ALIGN(len * sizeof(struct
> idpf_flex_tx_sched_desc),
>  					      IDPF_DMA_MEM_ALIGN);
>  		else
> -			ring_size = RTE_ALIGN(len * sizeof(struct
> idpf_flex_tx_desc),
> +			ring_size = RTE_ALIGN(len * sizeof(struct
> idpf_base_tx_desc),
>  					      IDPF_DMA_MEM_ALIGN);
>  		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
>  		break;
> --
> 2.25.1

Acked-by: Wenjun Wu <wenjun1.wu@intel.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3] common/idpf: refactor single queue Tx function
  2023-09-08 10:28   ` [PATCH v3] " Simei Su
  2023-09-13  5:57     ` Wu, Wenjun1
@ 2023-09-13  6:07     ` Xing, Beilei
  2023-09-14  1:50     ` [PATCH v4 0/3] refactor single queue Tx data path Simei Su
  2 siblings, 0 replies; 16+ messages in thread
From: Xing, Beilei @ 2023-09-13  6:07 UTC (permalink / raw)
  To: Su, Simei, Wu, Jingjing, Zhang, Qi Z; +Cc: dev, Wu, Wenjun1



> -----Original Message-----
> From: Su, Simei <simei.su@intel.com>
> Sent: Friday, September 8, 2023 6:28 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>;
> Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei
> <simei.su@intel.com>
> Subject: [PATCH v3] common/idpf: refactor single queue Tx function
> 
> This patch replaces flex Tx descriptor with base Tx descriptor to align with kernel
> driver practice.
> 
> Signed-off-by: Simei Su <simei.su@intel.com>
> ---
> v3:
> * Change context TSO descriptor from base mode to flex mode.
> 
> v2:
> * Refine commit title and commit log.
> * Remove redundant definition.
> * Modify base mode context TSO descriptor.
> 
>  drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
>  drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
>  drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
>  drivers/net/idpf/idpf_rxtx.c                  |  2 +-
>  4 files changed, 39 insertions(+), 41 deletions(-)
> 


> diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c index
> 3e3d81ca6d..64f2235580 100644
> --- a/drivers/net/idpf/idpf_rxtx.c
> +++ b/drivers/net/idpf/idpf_rxtx.c
> @@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t
> queue_idx,
>  			ring_size = RTE_ALIGN(len * sizeof(struct
> idpf_flex_tx_sched_desc),
>  					      IDPF_DMA_MEM_ALIGN);
>  		else
> -			ring_size = RTE_ALIGN(len * sizeof(struct
> idpf_flex_tx_desc),
> +			ring_size = RTE_ALIGN(len * sizeof(struct
> idpf_base_tx_desc),

Check if idpf_flex_tx_desc is used in cpfl PMD.

>  					      IDPF_DMA_MEM_ALIGN);
>  		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
>  		break;
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3] common/idpf: refactor single queue Tx function
  2023-09-13  5:57     ` Wu, Wenjun1
@ 2023-09-13  7:45       ` Zhang, Qi Z
  2023-09-14  1:47         ` Zhang, Qi Z
  0 siblings, 1 reply; 16+ messages in thread
From: Zhang, Qi Z @ 2023-09-13  7:45 UTC (permalink / raw)
  To: Wu, Wenjun1, Su, Simei, Wu, Jingjing, Xing, Beilei; +Cc: dev



> -----Original Message-----
> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
> Sent: Wednesday, September 13, 2023 1:57 PM
> To: Su, Simei <simei.su@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> Xing, Beilei <beilei.xing@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org
> Subject: RE: [PATCH v3] common/idpf: refactor single queue Tx function
> 
> 
> 
> > -----Original Message-----
> > From: Su, Simei <simei.su@intel.com>
> > Sent: Friday, September 8, 2023 6:28 PM
> > To: Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> > <beilei.xing@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> > Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei
> > <simei.su@intel.com>
> > Subject: [PATCH v3] common/idpf: refactor single queue Tx function
> >
> > This patch replaces flex Tx descriptor with base Tx descriptor to
> > align with kernel driver practice.
> >
> > Signed-off-by: Simei Su <simei.su@intel.com>
> > ---
> > v3:
> > * Change context TSO descriptor from base mode to flex mode.
> >
> > v2:
> > * Refine commit title and commit log.
> > * Remove redundant definition.
> > * Modify base mode context TSO descriptor.
> >
> >  drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
> >  drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
> >  drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
> >  drivers/net/idpf/idpf_rxtx.c                  |  2 +-
> >  4 files changed, 39 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/common/idpf/idpf_common_rxtx.c
> > b/drivers/common/idpf/idpf_common_rxtx.c
> > index fc87e3e243..e6d2486272 100644
> > --- a/drivers/common/idpf/idpf_common_rxtx.c
> > +++ b/drivers/common/idpf/idpf_common_rxtx.c
> > @@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct
> > idpf_tx_queue *txq)
> >  	}
> >
> >  	txe = txq->sw_ring;
> > -	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
> > +	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
> >  	for (i = 0; i < size; i++)
> >  		((volatile char *)txq->tx_ring)[i] = 0;
> >
> >  	prev = (uint16_t)(txq->nb_tx_desc - 1);
> >  	for (i = 0; i < txq->nb_tx_desc; i++) {
> > -		txq->tx_ring[i].qw1.cmd_dtype =
> > -
> > 	rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
> > +		txq->tx_ring[i].qw1 =
> > +
> > 	rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
> >  		txe[i].mbuf =  NULL;
> >  		txe[i].last_id = i;
> >  		txe[prev].next_id = i;
> > @@ -1307,17 +1307,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
> >  	uint16_t nb_tx_to_clean;
> >  	uint16_t i;
> >
> > -	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
> > +	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
> >
> >  	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
> >  	if (desc_to_clean_to >= nb_tx_desc)
> >  		desc_to_clean_to = (uint16_t)(desc_to_clean_to -
> nb_tx_desc);
> >
> >  	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
> > -	/* In the writeback Tx desccriptor, the only significant fields are the 4-
> > bit DTYPE */
> > -	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
> > -	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
> > -	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
> > +	if ((txd[desc_to_clean_to].qw1 &
> > +	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
> > +	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
> >  		TX_LOG(DEBUG, "TX descriptor %4u is not done "
> >  		       "(port=%d queue=%d)", desc_to_clean_to,
> >  		       txq->port_id, txq->queue_id); @@ -1331,10 +1330,7 @@
> > idpf_xmit_cleanup(struct idpf_tx_queue *txq)
> >  		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
> >  					    last_desc_cleaned);
> >
> > -	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
> > -	txd[desc_to_clean_to].qw1.buf_size = 0;
> > -	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
> > -		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
> > +	txd[desc_to_clean_to].qw1 = 0;
> >
> >  	txq->last_desc_cleaned = desc_to_clean_to;
> >  	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); @@ -
> > 1347,8 +1343,8 @@ uint16_t  idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct rte_mbuf **tx_pkts,
> >  			  uint16_t nb_pkts)
> >  {
> > -	volatile struct idpf_flex_tx_desc *txd;
> > -	volatile struct idpf_flex_tx_desc *txr;
> > +	volatile struct idpf_base_tx_desc *txd;
> > +	volatile struct idpf_base_tx_desc *txr;
> >  	union idpf_tx_offload tx_offload = {0};
> >  	struct idpf_tx_entry *txe, *txn;
> >  	struct idpf_tx_entry *sw_ring;
> > @@ -1356,6 +1352,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> struct
> > rte_mbuf **tx_pkts,
> >  	struct rte_mbuf *tx_pkt;
> >  	struct rte_mbuf *m_seg;
> >  	uint64_t buf_dma_addr;
> > +	uint32_t td_offset;
> >  	uint64_t ol_flags;
> >  	uint16_t tx_last;
> >  	uint16_t nb_used;
> > @@ -1382,6 +1379,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> struct
> > rte_mbuf **tx_pkts,
> >
> >  	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> >  		td_cmd = 0;
> > +		td_offset = 0;
> >
> >  		tx_pkt = *tx_pkts++;
> >  		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
> > @@ -1462,9 +1460,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct rte_mbuf **tx_pkts,
> >  			slen = m_seg->data_len;
> >  			buf_dma_addr = rte_mbuf_data_iova(m_seg);
> >  			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
> > -			txd->qw1.buf_size = slen;
> > -			txd->qw1.cmd_dtype =
> > rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
> > -
> > IDPF_FLEX_TXD_QW1_DTYPE_S);
> > +			txd->qw1 =
> > rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
> > +				((uint64_t)td_cmd  <<
> > IDPF_TXD_QW1_CMD_S) |
> > +				((uint64_t)td_offset <<
> > IDPF_TXD_QW1_OFFSET_S) |
> > +				((uint64_t)slen <<
> > IDPF_TXD_QW1_TX_BUF_SZ_S));
> >
> >  			txe->last_id = tx_last;
> >  			tx_id = txe->next_id;
> > @@ -1473,7 +1472,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> struct
> > rte_mbuf **tx_pkts,
> >  		} while (m_seg);
> >
> >  		/* The last packet data descriptor needs End Of Packet (EOP)
> */
> > -		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
> > +		td_cmd |= IDPF_TX_DESC_CMD_EOP;
> >  		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
> >  		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
> >
> > @@ -1482,7 +1481,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> struct
> > rte_mbuf **tx_pkts,
> >  			       "%4u (port=%d queue=%d)",
> >  			       tx_last, txq->port_id, txq->queue_id);
> >
> > -			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
> > +			td_cmd |= IDPF_TX_DESC_CMD_RS;
> >
> >  			/* Update txq RS bit counters */
> >  			txq->nb_used = 0;
> > @@ -1491,7 +1490,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> struct
> > rte_mbuf **tx_pkts,
> >  		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
> >  			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
> >
> > -		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd <<
> > IDPF_FLEX_TXD_QW1_CMD_S);
> > +		txd->qw1 |= rte_cpu_to_le_16(td_cmd <<
> > IDPF_TXD_QW1_CMD_S);
> >  	}
> >
> >  end_of_tx:
> > diff --git a/drivers/common/idpf/idpf_common_rxtx.h
> > b/drivers/common/idpf/idpf_common_rxtx.h
> > index 6cb83fc0a6..b49b1ed737 100644
> > --- a/drivers/common/idpf/idpf_common_rxtx.h
> > +++ b/drivers/common/idpf/idpf_common_rxtx.h
> > @@ -157,7 +157,7 @@ struct idpf_tx_entry {
> >  /* Structure associated with each TX queue. */  struct idpf_tx_queue {
> >  	const struct rte_memzone *mz;		/* memzone for Tx ring */
> > -	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual
> > address */
> > +	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual
> > address */
> >  	volatile union {
> >  		struct idpf_flex_tx_sched_desc *desc_ring;
> >  		struct idpf_splitq_tx_compl_desc *compl_ring; diff --git
> > a/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > b/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > index 81312617cc..afb0014a13 100644
> > --- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > +++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > @@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct
> > idpf_tx_queue *txq)
> >  	struct rte_mbuf *m, *free[txq->rs_thresh];
> >
> >  	/* check DD bits on threshold descriptor */
> > -	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
> > +	if ((txq->tx_ring[txq->next_dd].qw1 &
> >  			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
> >
> > 	rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
> >  		return 0;
> > @@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct
> > idpf_tx_vec_entry *txep,
> >  		txep[i].mbuf = tx_pkts[i];
> >  }
> >
> > -#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48  static __rte_always_inline
> > void -idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
> > +idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
> >  	  struct rte_mbuf *pkt, uint64_t flags)  {
> >  	uint64_t high_qw =
> > -		(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
> > IDPF_FLEX_TXD_QW1_DTYPE_S |
> > -		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
> > -		 ((uint64_t)pkt->data_len <<
> > IDPF_FLEX_TXD_QW1_BUF_SZ_S));
> > +		(IDPF_TX_DESC_DTYPE_DATA |
> > +		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
> > +		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
> >
> >  	__m128i descriptor = _mm_set_epi64x(high_qw,
> >  					    pkt->buf_iova + pkt->data_off);
> @@ -1131,11 +1130,11 @@
> > idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,  #define
> > IDPF_TX_LEN_MASK 0xAA  #define IDPF_TX_OFF_MASK 0x55  static
> > __rte_always_inline void - idpf_singleq_vtx(volatile struct
> > idpf_flex_tx_desc *txdp,
> > +idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
> >  	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)  {
> > -	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
> > -			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
> > +	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
> > +			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
> >
> >  	/* if unaligned on 32-bit boundary, do one to align */
> >  	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) { @@ -1148,19
> > +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
> >  		uint64_t hi_qw3 =
> >  			hi_qw_tmpl |
> >  			((uint64_t)pkt[3]->data_len <<
> > -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> >  		uint64_t hi_qw2 =
> >  			hi_qw_tmpl |
> >  			((uint64_t)pkt[2]->data_len <<
> > -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> >  		uint64_t hi_qw1 =
> >  			hi_qw_tmpl |
> >  			((uint64_t)pkt[1]->data_len <<
> > -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> >  		uint64_t hi_qw0 =
> >  			hi_qw_tmpl |
> >  			((uint64_t)pkt[0]->data_len <<
> > -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> >
> >  		__m512i desc0_3 =
> >  			_mm512_set_epi64
> > @@ -1187,11 +1186,11 @@
> idpf_singleq_xmit_fixed_burst_vec_avx512(void
> > *tx_queue, struct rte_mbuf **tx_pk
> >  					 uint16_t nb_pkts)
> >  {
> >  	struct idpf_tx_queue *txq = tx_queue;
> > -	volatile struct idpf_flex_tx_desc *txdp;
> > +	volatile struct idpf_base_tx_desc *txdp;
> >  	struct idpf_tx_vec_entry *txep;
> >  	uint16_t n, nb_commit, tx_id;
> > -	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
> > -	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
> > +	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
> > +	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
> >
> >  	/* cross rx_thresh boundary is not allowed */
> >  	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh); @@ -1238,9 +1237,9
> @@
> > idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct
> > rte_mbuf **tx_pk
> >
> >  	tx_id = (uint16_t)(tx_id + nb_commit);
> >  	if (tx_id > txq->next_rs) {
> > -		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
> > -
> > 	rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
> > -					 IDPF_FLEX_TXD_QW1_CMD_S);
> > +		txq->tx_ring[txq->next_rs].qw1 |=
> > +
> > 	rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
> > +					 IDPF_TXD_QW1_CMD_S);
> >  		txq->next_rs =
> >  			(uint16_t)(txq->next_rs + txq->rs_thresh);
> >  	}
> > diff --git a/drivers/net/idpf/idpf_rxtx.c
> > b/drivers/net/idpf/idpf_rxtx.c index
> > 3e3d81ca6d..64f2235580 100644
> > --- a/drivers/net/idpf/idpf_rxtx.c
> > +++ b/drivers/net/idpf/idpf_rxtx.c
> > @@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev,
> > uint16_t queue_idx,
> >  			ring_size = RTE_ALIGN(len * sizeof(struct
> > idpf_flex_tx_sched_desc),
> >  					      IDPF_DMA_MEM_ALIGN);
> >  		else
> > -			ring_size = RTE_ALIGN(len * sizeof(struct
> > idpf_flex_tx_desc),
> > +			ring_size = RTE_ALIGN(len * sizeof(struct
> > idpf_base_tx_desc),
> >  					      IDPF_DMA_MEM_ALIGN);
> >  		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
> >  		break;
> > --
> > 2.25.1
> 
> Acked-by: Wenjun Wu <wenjun1.wu@intel.com>

Applied to dpdk-next-net-intel.

Thanks
Qi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3] common/idpf: refactor single queue Tx function
  2023-09-13  7:45       ` Zhang, Qi Z
@ 2023-09-14  1:47         ` Zhang, Qi Z
  0 siblings, 0 replies; 16+ messages in thread
From: Zhang, Qi Z @ 2023-09-14  1:47 UTC (permalink / raw)
  To: Zhang, Qi Z, Wu, Wenjun1, Su, Simei, Wu, Jingjing, Xing, Beilei; +Cc: dev



> -----Original Message-----
> From: Zhang, Qi Z <qi.z.zhang@intel.com>
> Sent: Wednesday, September 13, 2023 3:46 PM
> To: Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei <simei.su@intel.com>;
> Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>
> Cc: dev@dpdk.org
> Subject: RE: [PATCH v3] common/idpf: refactor single queue Tx function
> 
> 
> 
> > -----Original Message-----
> > From: Wu, Wenjun1 <wenjun1.wu@intel.com>
> > Sent: Wednesday, September 13, 2023 1:57 PM
> > To: Su, Simei <simei.su@intel.com>; Wu, Jingjing
> > <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Zhang,
> > Qi Z <qi.z.zhang@intel.com>
> > Cc: dev@dpdk.org
> > Subject: RE: [PATCH v3] common/idpf: refactor single queue Tx function
> >
> >
> >
> > > -----Original Message-----
> > > From: Su, Simei <simei.su@intel.com>
> > > Sent: Friday, September 8, 2023 6:28 PM
> > > To: Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> > > <beilei.xing@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> > > Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei
> > > <simei.su@intel.com>
> > > Subject: [PATCH v3] common/idpf: refactor single queue Tx function
> > >
> > > This patch replaces flex Tx descriptor with base Tx descriptor to
> > > align with kernel driver practice.
> > >
> > > Signed-off-by: Simei Su <simei.su@intel.com>
> > > ---
> > > v3:
> > > * Change context TSO descriptor from base mode to flex mode.
> > >
> > > v2:
> > > * Refine commit title and commit log.
> > > * Remove redundant definition.
> > > * Modify base mode context TSO descriptor.
> > >
> > >  drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
> > >  drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
> > >  drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++--------
> -
> > >  drivers/net/idpf/idpf_rxtx.c                  |  2 +-
> > >  4 files changed, 39 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/drivers/common/idpf/idpf_common_rxtx.c
> > > b/drivers/common/idpf/idpf_common_rxtx.c
> > > index fc87e3e243..e6d2486272 100644
> > > --- a/drivers/common/idpf/idpf_common_rxtx.c
> > > +++ b/drivers/common/idpf/idpf_common_rxtx.c
> > > @@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct
> > > idpf_tx_queue *txq)
> > >  	}
> > >
> > >  	txe = txq->sw_ring;
> > > -	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
> > > +	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
> > >  	for (i = 0; i < size; i++)
> > >  		((volatile char *)txq->tx_ring)[i] = 0;
> > >
> > >  	prev = (uint16_t)(txq->nb_tx_desc - 1);
> > >  	for (i = 0; i < txq->nb_tx_desc; i++) {
> > > -		txq->tx_ring[i].qw1.cmd_dtype =
> > > -
> > > 	rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
> > > +		txq->tx_ring[i].qw1 =
> > > +
> > > 	rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
> > >  		txe[i].mbuf =  NULL;
> > >  		txe[i].last_id = i;
> > >  		txe[prev].next_id = i;
> > > @@ -1307,17 +1307,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue
> *txq)
> > >  	uint16_t nb_tx_to_clean;
> > >  	uint16_t i;
> > >
> > > -	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
> > > +	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
> > >
> > >  	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
> > >  	if (desc_to_clean_to >= nb_tx_desc)
> > >  		desc_to_clean_to = (uint16_t)(desc_to_clean_to -
> > nb_tx_desc);
> > >
> > >  	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
> > > -	/* In the writeback Tx desccriptor, the only significant fields are the 4-
> > > bit DTYPE */
> > > -	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
> > > -	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
> > > -	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
> > > +	if ((txd[desc_to_clean_to].qw1 &
> > > +	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
> > > +	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
> > >  		TX_LOG(DEBUG, "TX descriptor %4u is not done "
> > >  		       "(port=%d queue=%d)", desc_to_clean_to,
> > >  		       txq->port_id, txq->queue_id); @@ -1331,10 +1330,7 @@
> > > idpf_xmit_cleanup(struct idpf_tx_queue *txq)
> > >  		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
> > >  					    last_desc_cleaned);
> > >
> > > -	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
> > > -	txd[desc_to_clean_to].qw1.buf_size = 0;
> > > -	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
> > > -		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
> > > +	txd[desc_to_clean_to].qw1 = 0;
> > >
> > >  	txq->last_desc_cleaned = desc_to_clean_to;
> > >  	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); @@ -
> > > 1347,8 +1343,8 @@ uint16_t  idpf_dp_singleq_xmit_pkts(void
> > > *tx_queue, struct rte_mbuf **tx_pkts,
> > >  			  uint16_t nb_pkts)
> > >  {
> > > -	volatile struct idpf_flex_tx_desc *txd;
> > > -	volatile struct idpf_flex_tx_desc *txr;
> > > +	volatile struct idpf_base_tx_desc *txd;
> > > +	volatile struct idpf_base_tx_desc *txr;
> > >  	union idpf_tx_offload tx_offload = {0};
> > >  	struct idpf_tx_entry *txe, *txn;
> > >  	struct idpf_tx_entry *sw_ring;
> > > @@ -1356,6 +1352,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >  	struct rte_mbuf *tx_pkt;
> > >  	struct rte_mbuf *m_seg;
> > >  	uint64_t buf_dma_addr;
> > > +	uint32_t td_offset;
> > >  	uint64_t ol_flags;
> > >  	uint16_t tx_last;
> > >  	uint16_t nb_used;
> > > @@ -1382,6 +1379,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >
> > >  	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> > >  		td_cmd = 0;
> > > +		td_offset = 0;
> > >
> > >  		tx_pkt = *tx_pkts++;
> > >  		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
> > > @@ -1462,9 +1460,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > > struct rte_mbuf **tx_pkts,
> > >  			slen = m_seg->data_len;
> > >  			buf_dma_addr = rte_mbuf_data_iova(m_seg);
> > >  			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
> > > -			txd->qw1.buf_size = slen;
> > > -			txd->qw1.cmd_dtype =
> > > rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
> > > -
> > > IDPF_FLEX_TXD_QW1_DTYPE_S);
> > > +			txd->qw1 =
> > > rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
> > > +				((uint64_t)td_cmd  <<
> > > IDPF_TXD_QW1_CMD_S) |
> > > +				((uint64_t)td_offset <<
> > > IDPF_TXD_QW1_OFFSET_S) |
> > > +				((uint64_t)slen <<
> > > IDPF_TXD_QW1_TX_BUF_SZ_S));
> > >
> > >  			txe->last_id = tx_last;
> > >  			tx_id = txe->next_id;
> > > @@ -1473,7 +1472,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >  		} while (m_seg);
> > >
> > >  		/* The last packet data descriptor needs End Of Packet (EOP)
> > */
> > > -		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
> > > +		td_cmd |= IDPF_TX_DESC_CMD_EOP;
> > >  		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
> > >  		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
> > >
> > > @@ -1482,7 +1481,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >  			       "%4u (port=%d queue=%d)",
> > >  			       tx_last, txq->port_id, txq->queue_id);
> > >
> > > -			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
> > > +			td_cmd |= IDPF_TX_DESC_CMD_RS;
> > >
> > >  			/* Update txq RS bit counters */
> > >  			txq->nb_used = 0;
> > > @@ -1491,7 +1490,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >  		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
> > >  			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
> > >
> > > -		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd <<
> > > IDPF_FLEX_TXD_QW1_CMD_S);
> > > +		txd->qw1 |= rte_cpu_to_le_16(td_cmd <<
> > > IDPF_TXD_QW1_CMD_S);
> > >  	}
> > >
> > >  end_of_tx:
> > > diff --git a/drivers/common/idpf/idpf_common_rxtx.h
> > > b/drivers/common/idpf/idpf_common_rxtx.h
> > > index 6cb83fc0a6..b49b1ed737 100644
> > > --- a/drivers/common/idpf/idpf_common_rxtx.h
> > > +++ b/drivers/common/idpf/idpf_common_rxtx.h
> > > @@ -157,7 +157,7 @@ struct idpf_tx_entry {
> > >  /* Structure associated with each TX queue. */  struct idpf_tx_queue {
> > >  	const struct rte_memzone *mz;		/* memzone for Tx ring */
> > > -	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual
> > > address */
> > > +	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual
> > > address */
> > >  	volatile union {
> > >  		struct idpf_flex_tx_sched_desc *desc_ring;
> > >  		struct idpf_splitq_tx_compl_desc *compl_ring; diff --git
> > > a/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > > b/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > > index 81312617cc..afb0014a13 100644
> > > --- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > > +++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > > @@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct
> > > idpf_tx_queue *txq)
> > >  	struct rte_mbuf *m, *free[txq->rs_thresh];
> > >
> > >  	/* check DD bits on threshold descriptor */
> > > -	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
> > > +	if ((txq->tx_ring[txq->next_dd].qw1 &
> > >  			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
> > >
> > > 	rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
> > >  		return 0;
> > > @@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct
> > > idpf_tx_vec_entry *txep,
> > >  		txep[i].mbuf = tx_pkts[i];
> > >  }
> > >
> > > -#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48  static __rte_always_inline
> > > void -idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
> > > +idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
> > >  	  struct rte_mbuf *pkt, uint64_t flags)  {
> > >  	uint64_t high_qw =
> > > -		(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
> > > IDPF_FLEX_TXD_QW1_DTYPE_S |
> > > -		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
> > > -		 ((uint64_t)pkt->data_len <<
> > > IDPF_FLEX_TXD_QW1_BUF_SZ_S));
> > > +		(IDPF_TX_DESC_DTYPE_DATA |
> > > +		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
> > > +		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
> > >
> > >  	__m128i descriptor = _mm_set_epi64x(high_qw,
> > >  					    pkt->buf_iova + pkt->data_off);
> > @@ -1131,11 +1130,11 @@
> > > idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,  #define
> > > IDPF_TX_LEN_MASK 0xAA  #define IDPF_TX_OFF_MASK 0x55  static
> > > __rte_always_inline void - idpf_singleq_vtx(volatile struct
> > > idpf_flex_tx_desc *txdp,
> > > +idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
> > >  	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)  {
> > > -	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
> > > -			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
> > > +	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
> > > +			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
> > >
> > >  	/* if unaligned on 32-bit boundary, do one to align */
> > >  	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) { @@ -1148,19
> > > +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc
> > > +*txdp,
> > >  		uint64_t hi_qw3 =
> > >  			hi_qw_tmpl |
> > >  			((uint64_t)pkt[3]->data_len <<
> > > -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > > +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> > >  		uint64_t hi_qw2 =
> > >  			hi_qw_tmpl |
> > >  			((uint64_t)pkt[2]->data_len <<
> > > -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > > +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> > >  		uint64_t hi_qw1 =
> > >  			hi_qw_tmpl |
> > >  			((uint64_t)pkt[1]->data_len <<
> > > -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > > +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> > >  		uint64_t hi_qw0 =
> > >  			hi_qw_tmpl |
> > >  			((uint64_t)pkt[0]->data_len <<
> > > -			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > > +			 IDPF_TXD_QW1_TX_BUF_SZ_S);
> > >
> > >  		__m512i desc0_3 =
> > >  			_mm512_set_epi64
> > > @@ -1187,11 +1186,11 @@
> > idpf_singleq_xmit_fixed_burst_vec_avx512(void
> > > *tx_queue, struct rte_mbuf **tx_pk
> > >  					 uint16_t nb_pkts)
> > >  {
> > >  	struct idpf_tx_queue *txq = tx_queue;
> > > -	volatile struct idpf_flex_tx_desc *txdp;
> > > +	volatile struct idpf_base_tx_desc *txdp;
> > >  	struct idpf_tx_vec_entry *txep;
> > >  	uint16_t n, nb_commit, tx_id;
> > > -	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
> > > -	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
> > > +	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
> > > +	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
> > >
> > >  	/* cross rx_thresh boundary is not allowed */
> > >  	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh); @@ -1238,9 +1237,9
> > @@
> > > idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct
> > > rte_mbuf **tx_pk
> > >
> > >  	tx_id = (uint16_t)(tx_id + nb_commit);
> > >  	if (tx_id > txq->next_rs) {
> > > -		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
> > > -
> > > 	rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
> > > -					 IDPF_FLEX_TXD_QW1_CMD_S);
> > > +		txq->tx_ring[txq->next_rs].qw1 |=
> > > +
> > > 	rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
> > > +					 IDPF_TXD_QW1_CMD_S);
> > >  		txq->next_rs =
> > >  			(uint16_t)(txq->next_rs + txq->rs_thresh);
> > >  	}
> > > diff --git a/drivers/net/idpf/idpf_rxtx.c
> > > b/drivers/net/idpf/idpf_rxtx.c index
> > > 3e3d81ca6d..64f2235580 100644
> > > --- a/drivers/net/idpf/idpf_rxtx.c
> > > +++ b/drivers/net/idpf/idpf_rxtx.c
> > > @@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev,
> > > uint16_t queue_idx,
> > >  			ring_size = RTE_ALIGN(len * sizeof(struct
> > > idpf_flex_tx_sched_desc),
> > >  					      IDPF_DMA_MEM_ALIGN);
> > >  		else
> > > -			ring_size = RTE_ALIGN(len * sizeof(struct
> > > idpf_flex_tx_desc),
> > > +			ring_size = RTE_ALIGN(len * sizeof(struct
> > > idpf_base_tx_desc),
> > >  					      IDPF_DMA_MEM_ALIGN);
> > >  		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
> > >  		break;
> > > --
> > > 2.25.1
> >
> > Acked-by: Wenjun Wu <wenjun1.wu@intel.com>
> 
> Applied to dpdk-next-net-intel.

Reverted due to new change request.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 0/3] refactor single queue Tx data path
  2023-09-08 10:28   ` [PATCH v3] " Simei Su
  2023-09-13  5:57     ` Wu, Wenjun1
  2023-09-13  6:07     ` Xing, Beilei
@ 2023-09-14  1:50     ` Simei Su
  2023-09-14  1:50       ` [PATCH v4 1/3] common/idpf: " Simei Su
                         ` (4 more replies)
  2 siblings, 5 replies; 16+ messages in thread
From: Simei Su @ 2023-09-14  1:50 UTC (permalink / raw)
  To: jingjing.wu, beilei.xing, qi.z.zhang; +Cc: dev, wenjun1.wu, Simei Su

1. Refine single queue Tx data path for idpf common module.
2. Refine Tx queue setup for idpf pmd.
3. Refine Tx queue setup for cpfl pmd.

v4:
* Split one patch into patchset.
* Refine commit title and commit log.

v3:
* Change context TSO descriptor from base mode to flex mode.

v2:
* Refine commit title and commit log.
* Remove redundant definition.
* Modify base mode context TSO descriptor.

Simei Su (3):
  common/idpf: refactor single queue Tx data path
  net/idpf: refine Tx queue setup
  net/cpfl: refine Tx queue setup

 drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
 drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
 drivers/net/cpfl/cpfl_rxtx.c                  |  2 +-
 drivers/net/idpf/idpf_rxtx.c                  |  2 +-
 5 files changed, 40 insertions(+), 42 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 1/3] common/idpf: refactor single queue Tx data path
  2023-09-14  1:50     ` [PATCH v4 0/3] refactor single queue Tx data path Simei Su
@ 2023-09-14  1:50       ` Simei Su
  2023-09-14  1:50       ` [PATCH v4 2/3] net/idpf: refine Tx queue setup Simei Su
                         ` (3 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Simei Su @ 2023-09-14  1:50 UTC (permalink / raw)
  To: jingjing.wu, beilei.xing, qi.z.zhang; +Cc: dev, wenjun1.wu, Simei Su

Currently, single queue Tx data path uses flex Tx data descriptor
which is changed in the latest idpf spec. This patch replaces flex
Tx data descriptor with base Tx data descriptor for single queue Tx
data path.

Signed-off-by: Simei Su <simei.su@intel.com>
Acked-by: Wenjun Wu <wenjun1.wu@intel.com>
---
 drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
 drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
 3 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c
index fc87e3e243..e6d2486272 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq)
 	}
 
 	txe = txq->sw_ring;
-	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
+	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
 	for (i = 0; i < size; i++)
 		((volatile char *)txq->tx_ring)[i] = 0;
 
 	prev = (uint16_t)(txq->nb_tx_desc - 1);
 	for (i = 0; i < txq->nb_tx_desc; i++) {
-		txq->tx_ring[i].qw1.cmd_dtype =
-			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
+		txq->tx_ring[i].qw1 =
+			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
 		txe[i].mbuf =  NULL;
 		txe[i].last_id = i;
 		txe[prev].next_id = i;
@@ -1307,17 +1307,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 	uint16_t nb_tx_to_clean;
 	uint16_t i;
 
-	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
+	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
 
 	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
 	if (desc_to_clean_to >= nb_tx_desc)
 		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 
 	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-	/* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */
-	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
-	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
-	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
+	if ((txd[desc_to_clean_to].qw1 &
+	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
+	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
 		TX_LOG(DEBUG, "TX descriptor %4u is not done "
 		       "(port=%d queue=%d)", desc_to_clean_to,
 		       txq->port_id, txq->queue_id);
@@ -1331,10 +1330,7 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 					    last_desc_cleaned);
 
-	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
-	txd[desc_to_clean_to].qw1.buf_size = 0;
-	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
-		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
+	txd[desc_to_clean_to].qw1 = 0;
 
 	txq->last_desc_cleaned = desc_to_clean_to;
 	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
@@ -1347,8 +1343,8 @@ uint16_t
 idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			  uint16_t nb_pkts)
 {
-	volatile struct idpf_flex_tx_desc *txd;
-	volatile struct idpf_flex_tx_desc *txr;
+	volatile struct idpf_base_tx_desc *txd;
+	volatile struct idpf_base_tx_desc *txr;
 	union idpf_tx_offload tx_offload = {0};
 	struct idpf_tx_entry *txe, *txn;
 	struct idpf_tx_entry *sw_ring;
@@ -1356,6 +1352,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf *tx_pkt;
 	struct rte_mbuf *m_seg;
 	uint64_t buf_dma_addr;
+	uint32_t td_offset;
 	uint64_t ol_flags;
 	uint16_t tx_last;
 	uint16_t nb_used;
@@ -1382,6 +1379,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		td_cmd = 0;
+		td_offset = 0;
 
 		tx_pkt = *tx_pkts++;
 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
@@ -1462,9 +1460,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			slen = m_seg->data_len;
 			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
-			txd->qw1.buf_size = slen;
-			txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
-							      IDPF_FLEX_TXD_QW1_DTYPE_S);
+			txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
+				((uint64_t)td_cmd  << IDPF_TXD_QW1_CMD_S) |
+				((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) |
+				((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 			txe->last_id = tx_last;
 			tx_id = txe->next_id;
@@ -1473,7 +1472,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		} while (m_seg);
 
 		/* The last packet data descriptor needs End Of Packet (EOP) */
-		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
+		td_cmd |= IDPF_TX_DESC_CMD_EOP;
 		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
 		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
 
@@ -1482,7 +1481,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       "%4u (port=%d queue=%d)",
 			       tx_last, txq->port_id, txq->queue_id);
 
-			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
+			td_cmd |= IDPF_TX_DESC_CMD_RS;
 
 			/* Update txq RS bit counters */
 			txq->nb_used = 0;
@@ -1491,7 +1490,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
 			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
 
-		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S);
+		txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
 	}
 
 end_of_tx:
diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h
index 6cb83fc0a6..b49b1ed737 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -157,7 +157,7 @@ struct idpf_tx_entry {
 /* Structure associated with each TX queue. */
 struct idpf_tx_queue {
 	const struct rte_memzone *mz;		/* memzone for Tx ring */
-	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual address */
+	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual address */
 	volatile union {
 		struct idpf_flex_tx_sched_desc *desc_ring;
 		struct idpf_splitq_tx_compl_desc *compl_ring;
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 2ac46fb1d2..f65e8d512b 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq)
 	struct rte_mbuf *m, *free[txq->rs_thresh];
 
 	/* check DD bits on threshold descriptor */
-	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
+	if ((txq->tx_ring[txq->next_dd].qw1 &
 			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
 			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
@@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct idpf_tx_vec_entry *txep,
 		txep[i].mbuf = tx_pkts[i];
 }
 
-#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48
 static __rte_always_inline void
-idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
 {
 	uint64_t high_qw =
-		(IDPF_TX_DESC_DTYPE_FLEX_DATA << IDPF_FLEX_TXD_QW1_DTYPE_S |
-		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
-		 ((uint64_t)pkt->data_len << IDPF_FLEX_TXD_QW1_BUF_SZ_S));
+		(IDPF_TX_DESC_DTYPE_DATA |
+		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
+		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1131,11 +1130,11 @@ idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
 #define IDPF_TX_LEN_MASK 0xAA
 #define IDPF_TX_OFF_MASK 0x55
 static __rte_always_inline void
-idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
 	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
-	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
-			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
+	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
+			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1148,19 +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
 		uint64_t hi_qw3 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1187,11 +1186,11 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 					 uint16_t nb_pkts)
 {
 	struct idpf_tx_queue *txq = tx_queue;
-	volatile struct idpf_flex_tx_desc *txdp;
+	volatile struct idpf_base_tx_desc *txdp;
 	struct idpf_tx_vec_entry *txep;
 	uint16_t n, nb_commit, tx_id;
-	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
-	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
+	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
+	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
 
 	/* cross rx_thresh boundary is not allowed */
 	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);
@@ -1238,9 +1237,9 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
-		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
-			rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
-					 IDPF_FLEX_TXD_QW1_CMD_S);
+		txq->tx_ring[txq->next_rs].qw1 |=
+			rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
+					 IDPF_TXD_QW1_CMD_S);
 		txq->next_rs =
 			(uint16_t)(txq->next_rs + txq->rs_thresh);
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 2/3] net/idpf: refine Tx queue setup
  2023-09-14  1:50     ` [PATCH v4 0/3] refactor single queue Tx data path Simei Su
  2023-09-14  1:50       ` [PATCH v4 1/3] common/idpf: " Simei Su
@ 2023-09-14  1:50       ` Simei Su
  2023-09-14  1:50       ` [PATCH v4 3/3] net/cpfl: " Simei Su
                         ` (2 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Simei Su @ 2023-09-14  1:50 UTC (permalink / raw)
  To: jingjing.wu, beilei.xing, qi.z.zhang; +Cc: dev, wenjun1.wu, Simei Su

This patch refines Tx single queue setup to align with Tx data path.

Signed-off-by: Simei Su <simei.su@intel.com>
Acked-by: Wenjun Wu <wenjun1.wu@intel.com>
---
 drivers/net/idpf/idpf_rxtx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 3e3d81ca6d..64f2235580 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
 		break;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 3/3] net/cpfl: refine Tx queue setup
  2023-09-14  1:50     ` [PATCH v4 0/3] refactor single queue Tx data path Simei Su
  2023-09-14  1:50       ` [PATCH v4 1/3] common/idpf: " Simei Su
  2023-09-14  1:50       ` [PATCH v4 2/3] net/idpf: refine Tx queue setup Simei Su
@ 2023-09-14  1:50       ` Simei Su
  2023-09-14  1:54       ` [PATCH v4 0/3] refactor single queue Tx data path Xing, Beilei
  2023-09-14  6:37       ` [PATCH v5] common/idpf: " Simei Su
  4 siblings, 0 replies; 16+ messages in thread
From: Simei Su @ 2023-09-14  1:50 UTC (permalink / raw)
  To: jingjing.wu, beilei.xing, qi.z.zhang; +Cc: dev, wenjun1.wu, Simei Su

This patch refines Tx single queue setup to align with Tx data path.

Signed-off-by: Simei Su <simei.su@intel.com>
Acked-by: Wenjun Wu <wenjun1.wu@intel.com>
---
 drivers/net/cpfl/cpfl_rxtx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index 2ef6871a85..ab8bec4645 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -135,7 +135,7 @@ cpfl_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      CPFL_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      CPFL_DMA_MEM_ALIGN);
 		memcpy(ring_name, "cpfl Tx ring", sizeof("cpfl Tx ring"));
 		break;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v4 0/3] refactor single queue Tx data path
  2023-09-14  1:50     ` [PATCH v4 0/3] refactor single queue Tx data path Simei Su
                         ` (2 preceding siblings ...)
  2023-09-14  1:50       ` [PATCH v4 3/3] net/cpfl: " Simei Su
@ 2023-09-14  1:54       ` Xing, Beilei
  2023-09-14  6:37       ` [PATCH v5] common/idpf: " Simei Su
  4 siblings, 0 replies; 16+ messages in thread
From: Xing, Beilei @ 2023-09-14  1:54 UTC (permalink / raw)
  To: Su, Simei, Wu, Jingjing, Zhang, Qi Z; +Cc: dev, Wu, Wenjun1



> -----Original Message-----
> From: Su, Simei <simei.su@intel.com>
> Sent: Thursday, September 14, 2023 9:50 AM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei
> <simei.su@intel.com>
> Subject: [PATCH v4 0/3] refactor single queue Tx data path
> 
> 1. Refine single queue Tx data path for idpf common module.
> 2. Refine Tx queue setup for idpf pmd.
> 3. Refine Tx queue setup for cpfl pmd.
> 
> v4:
> * Split one patch into patchset.
> * Refine commit title and commit log.
> 
> v3:
> * Change context TSO descriptor from base mode to flex mode.
> 
> v2:
> * Refine commit title and commit log.
> * Remove redundant definition.
> * Modify base mode context TSO descriptor.
> 
> Simei Su (3):
>   common/idpf: refactor single queue Tx data path
>   net/idpf: refine Tx queue setup
>   net/cpfl: refine Tx queue setup
> 
>  drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
>  drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
>  drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
>  drivers/net/cpfl/cpfl_rxtx.c                  |  2 +-
>  drivers/net/idpf/idpf_rxtx.c                  |  2 +-
>  5 files changed, 40 insertions(+), 42 deletions(-)
> 
> --
> 2.25.1

Acked-by: Beilei Xing <beilei.xing@intel.com>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v5] common/idpf: refactor single queue Tx data path
  2023-09-14  1:50     ` [PATCH v4 0/3] refactor single queue Tx data path Simei Su
                         ` (3 preceding siblings ...)
  2023-09-14  1:54       ` [PATCH v4 0/3] refactor single queue Tx data path Xing, Beilei
@ 2023-09-14  6:37       ` Simei Su
  2023-09-14 11:08         ` Zhang, Qi Z
  4 siblings, 1 reply; 16+ messages in thread
From: Simei Su @ 2023-09-14  6:37 UTC (permalink / raw)
  To: jingjing.wu, beilei.xing, qi.z.zhang; +Cc: dev, wenjun1.wu, Simei Su

Currently, single queue Tx data path uses flex Tx data
descriptor(DTYPE3) which is removed in the latest idpf spec.
This patch replaces flex Tx data descriptor with base Tx data
descriptor for single queue Tx data path and refines Tx single
queue setup to align with Tx data path.

Signed-off-by: Simei Su <simei.su@intel.com>
Acked-by: Wenjun Wu <wenjun1.wu@intel.com>
Acked-by: Beilei Xing <beilei.xing@intel.com>
---
 drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
 drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
 drivers/net/cpfl/cpfl_rxtx.c                  |  2 +-
 drivers/net/idpf/idpf_rxtx.c                  |  2 +-
 5 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c
index fc87e3e243..e6d2486272 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq)
 	}
 
 	txe = txq->sw_ring;
-	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
+	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
 	for (i = 0; i < size; i++)
 		((volatile char *)txq->tx_ring)[i] = 0;
 
 	prev = (uint16_t)(txq->nb_tx_desc - 1);
 	for (i = 0; i < txq->nb_tx_desc; i++) {
-		txq->tx_ring[i].qw1.cmd_dtype =
-			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
+		txq->tx_ring[i].qw1 =
+			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
 		txe[i].mbuf =  NULL;
 		txe[i].last_id = i;
 		txe[prev].next_id = i;
@@ -1307,17 +1307,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 	uint16_t nb_tx_to_clean;
 	uint16_t i;
 
-	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
+	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
 
 	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
 	if (desc_to_clean_to >= nb_tx_desc)
 		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 
 	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-	/* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */
-	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
-	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
-	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
+	if ((txd[desc_to_clean_to].qw1 &
+	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
+	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
 		TX_LOG(DEBUG, "TX descriptor %4u is not done "
 		       "(port=%d queue=%d)", desc_to_clean_to,
 		       txq->port_id, txq->queue_id);
@@ -1331,10 +1330,7 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 					    last_desc_cleaned);
 
-	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
-	txd[desc_to_clean_to].qw1.buf_size = 0;
-	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
-		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
+	txd[desc_to_clean_to].qw1 = 0;
 
 	txq->last_desc_cleaned = desc_to_clean_to;
 	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
@@ -1347,8 +1343,8 @@ uint16_t
 idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			  uint16_t nb_pkts)
 {
-	volatile struct idpf_flex_tx_desc *txd;
-	volatile struct idpf_flex_tx_desc *txr;
+	volatile struct idpf_base_tx_desc *txd;
+	volatile struct idpf_base_tx_desc *txr;
 	union idpf_tx_offload tx_offload = {0};
 	struct idpf_tx_entry *txe, *txn;
 	struct idpf_tx_entry *sw_ring;
@@ -1356,6 +1352,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf *tx_pkt;
 	struct rte_mbuf *m_seg;
 	uint64_t buf_dma_addr;
+	uint32_t td_offset;
 	uint64_t ol_flags;
 	uint16_t tx_last;
 	uint16_t nb_used;
@@ -1382,6 +1379,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		td_cmd = 0;
+		td_offset = 0;
 
 		tx_pkt = *tx_pkts++;
 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
@@ -1462,9 +1460,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			slen = m_seg->data_len;
 			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
-			txd->qw1.buf_size = slen;
-			txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
-							      IDPF_FLEX_TXD_QW1_DTYPE_S);
+			txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
+				((uint64_t)td_cmd  << IDPF_TXD_QW1_CMD_S) |
+				((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) |
+				((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 			txe->last_id = tx_last;
 			tx_id = txe->next_id;
@@ -1473,7 +1472,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		} while (m_seg);
 
 		/* The last packet data descriptor needs End Of Packet (EOP) */
-		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
+		td_cmd |= IDPF_TX_DESC_CMD_EOP;
 		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
 		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
 
@@ -1482,7 +1481,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       "%4u (port=%d queue=%d)",
 			       tx_last, txq->port_id, txq->queue_id);
 
-			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
+			td_cmd |= IDPF_TX_DESC_CMD_RS;
 
 			/* Update txq RS bit counters */
 			txq->nb_used = 0;
@@ -1491,7 +1490,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
 			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
 
-		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S);
+		txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
 	}
 
 end_of_tx:
diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h
index 6cb83fc0a6..b49b1ed737 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -157,7 +157,7 @@ struct idpf_tx_entry {
 /* Structure associated with each TX queue. */
 struct idpf_tx_queue {
 	const struct rte_memzone *mz;		/* memzone for Tx ring */
-	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual address */
+	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual address */
 	volatile union {
 		struct idpf_flex_tx_sched_desc *desc_ring;
 		struct idpf_splitq_tx_compl_desc *compl_ring;
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 2ac46fb1d2..f65e8d512b 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq)
 	struct rte_mbuf *m, *free[txq->rs_thresh];
 
 	/* check DD bits on threshold descriptor */
-	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
+	if ((txq->tx_ring[txq->next_dd].qw1 &
 			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
 			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
@@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct idpf_tx_vec_entry *txep,
 		txep[i].mbuf = tx_pkts[i];
 }
 
-#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48
 static __rte_always_inline void
-idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
 {
 	uint64_t high_qw =
-		(IDPF_TX_DESC_DTYPE_FLEX_DATA << IDPF_FLEX_TXD_QW1_DTYPE_S |
-		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
-		 ((uint64_t)pkt->data_len << IDPF_FLEX_TXD_QW1_BUF_SZ_S));
+		(IDPF_TX_DESC_DTYPE_DATA |
+		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
+		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1131,11 +1130,11 @@ idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
 #define IDPF_TX_LEN_MASK 0xAA
 #define IDPF_TX_OFF_MASK 0x55
 static __rte_always_inline void
-idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
 	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
-	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
-			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
+	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
+			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1148,19 +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
 		uint64_t hi_qw3 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1187,11 +1186,11 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 					 uint16_t nb_pkts)
 {
 	struct idpf_tx_queue *txq = tx_queue;
-	volatile struct idpf_flex_tx_desc *txdp;
+	volatile struct idpf_base_tx_desc *txdp;
 	struct idpf_tx_vec_entry *txep;
 	uint16_t n, nb_commit, tx_id;
-	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
-	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
+	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
+	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
 
 	/* cross rx_thresh boundary is not allowed */
 	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);
@@ -1238,9 +1237,9 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
-		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
-			rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
-					 IDPF_FLEX_TXD_QW1_CMD_S);
+		txq->tx_ring[txq->next_rs].qw1 |=
+			rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
+					 IDPF_TXD_QW1_CMD_S);
 		txq->next_rs =
 			(uint16_t)(txq->next_rs + txq->rs_thresh);
 	}
diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index 2ef6871a85..ab8bec4645 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -135,7 +135,7 @@ cpfl_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      CPFL_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      CPFL_DMA_MEM_ALIGN);
 		memcpy(ring_name, "cpfl Tx ring", sizeof("cpfl Tx ring"));
 		break;
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 3e3d81ca6d..64f2235580 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
 		break;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v5] common/idpf: refactor single queue Tx data path
  2023-09-14  6:37       ` [PATCH v5] common/idpf: " Simei Su
@ 2023-09-14 11:08         ` Zhang, Qi Z
  0 siblings, 0 replies; 16+ messages in thread
From: Zhang, Qi Z @ 2023-09-14 11:08 UTC (permalink / raw)
  To: Su, Simei, Wu, Jingjing, Xing, Beilei; +Cc: dev, Wu, Wenjun1



> -----Original Message-----
> From: Su, Simei <simei.su@intel.com>
> Sent: Thursday, September 14, 2023 2:37 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>;
> Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1.wu@intel.com>; Su, Simei
> <simei.su@intel.com>
> Subject: [PATCH v5] common/idpf: refactor single queue Tx data path
> 
> Currently, single queue Tx data path uses flex Tx data
> descriptor(DTYPE3) which is removed in the latest idpf spec.
> This patch replaces flex Tx data descriptor with base Tx data descriptor for
> single queue Tx data path and refines Tx single queue setup to align with Tx
> data path.
> 
> Signed-off-by: Simei Su <simei.su@intel.com>
> Acked-by: Wenjun Wu <wenjun1.wu@intel.com>
> Acked-by: Beilei Xing <beilei.xing@intel.com>

Applied to dpdk-next-net-intel.

Thanks
Qi


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-09-14 11:08 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-25  7:21 [PATCH] common/idpf: rework single queue Tx function Simei Su
2023-08-25  7:48 ` Wu, Wenjun1
2023-08-25  8:14 ` Zhang, Qi Z
2023-09-04  7:02 ` [PATCH v2] common/idpf: refactor " Simei Su
2023-09-08 10:28   ` [PATCH v3] " Simei Su
2023-09-13  5:57     ` Wu, Wenjun1
2023-09-13  7:45       ` Zhang, Qi Z
2023-09-14  1:47         ` Zhang, Qi Z
2023-09-13  6:07     ` Xing, Beilei
2023-09-14  1:50     ` [PATCH v4 0/3] refactor single queue Tx data path Simei Su
2023-09-14  1:50       ` [PATCH v4 1/3] common/idpf: " Simei Su
2023-09-14  1:50       ` [PATCH v4 2/3] net/idpf: refine Tx queue setup Simei Su
2023-09-14  1:50       ` [PATCH v4 3/3] net/cpfl: " Simei Su
2023-09-14  1:54       ` [PATCH v4 0/3] refactor single queue Tx data path Xing, Beilei
2023-09-14  6:37       ` [PATCH v5] common/idpf: " Simei Su
2023-09-14 11:08         ` Zhang, Qi Z

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).