DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] net/iavf: enable tx outer checksum offload on avx512
@ 2022-12-22  7:32 Zhichao Zeng
  2023-02-02  5:58 ` [PATCH v2] " Zhichao Zeng
  0 siblings, 1 reply; 8+ messages in thread
From: Zhichao Zeng @ 2022-12-22  7:32 UTC (permalink / raw)
  To: dev
  Cc: ke1.xu, qi.z.zhang, yidingx.zhou, Zhichao Zeng, Jingjing Wu,
	Beilei Xing, Bruce Richardson, Konstantin Ananyev

This patch is to enable outer checksum offload on avx512 Tx path
for tunnel packet by adding Tx path with context descriptor
and adjusting path select logic.

Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c            |  21 +-
 drivers/net/iavf/iavf_rxtx.h            |  22 +-
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 324 ++++++++++++++++++++++--
 drivers/net/iavf/iavf_rxtx_vec_common.h |  47 +++-
 4 files changed, 376 insertions(+), 38 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index cf87a6beda..9c8c80b4a6 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -781,10 +781,13 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		else
 			insertion_cap = insertion_support->inner;
 
-		if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+		if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) {
 			txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-		else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
+			PMD_INIT_LOG(DEBUG, "VLAN insertion_cap: L2TAG1");
+		} else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2) {
 			txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2;
+			PMD_INIT_LOG(DEBUG, "VLAN insertion_cap: L2TAG2");
+		}
 	} else {
 		txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
 	}
@@ -3215,7 +3218,7 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 	if (check_ret >= 0 &&
 	    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
 		/* SSE and AVX2 not support offload path yet. */
-		if (check_ret == IAVF_VECTOR_PATH) {
+		if (check_ret >= IAVF_VECTOR_PATH) {
 			use_sse = true;
 			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
 			     rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
@@ -3247,11 +3250,21 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 Vector Tx (port %d).",
 					    dev->data->port_id);
-			} else {
+			} else if (check_ret == IAVF_VECTOR_OFFLOAD_PATH) {
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_offload;
 				dev->tx_pkt_prepare = iavf_prep_pkts;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 OFFLOAD Vector Tx (port %d).",
 					    dev->data->port_id);
+			} else if (check_ret == IAVF_VECTOR_CTX_PATH) {
+				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_ctx;
+				dev->tx_pkt_prepare = iavf_prep_pkts;
+				PMD_DRV_LOG(DEBUG, "Using AVX512 CONTEXT Vector Tx (port %d).",
+					    dev->data->port_id);
+			} else {
+				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_ctx_offload;
+				dev->tx_pkt_prepare = iavf_prep_pkts;
+				PMD_DRV_LOG(DEBUG, "Using AVX512 CONTEXT OFFLOAD Vector Tx (port %d).",
+					    dev->data->port_id);
 			}
 		}
 #endif
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index a6ad88885b..c0eb6bd9a7 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -26,8 +26,6 @@
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
 		RTE_ETH_TX_OFFLOAD_MULTI_SEGS |		 \
 		RTE_ETH_TX_OFFLOAD_TCP_TSO |		 \
-		RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |    \
-		RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM |	\
 		RTE_ETH_TX_OFFLOAD_SECURITY)
 
 #define IAVF_TX_VECTOR_OFFLOAD (				 \
@@ -38,14 +36,29 @@
 		RTE_ETH_TX_OFFLOAD_UDP_CKSUM |		 \
 		RTE_ETH_TX_OFFLOAD_TCP_CKSUM)
 
+#define IAVF_TX_VECTOR_OFFLOAD_CTX (			\
+		RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |	\
+		RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM)
+
 #define IAVF_RX_VECTOR_OFFLOAD (				 \
 		RTE_ETH_RX_OFFLOAD_CHECKSUM |		 \
 		RTE_ETH_RX_OFFLOAD_SCTP_CKSUM |		 \
 		RTE_ETH_RX_OFFLOAD_VLAN |		 \
 		RTE_ETH_RX_OFFLOAD_RSS_HASH)
 
+/**
+ * According to the vlan capabilities returned by the driver and FW, the vlan tci
+ * needs to be inserted to the L2TAG1 or L2TAG2 fields.
+ * If L2TAG1, it should be inserted to the L2TAG1 field in data desc.
+ * if L2TAG2, it should be inserted to the L2TAG2 field in ctx desc.
+ * Besides, tunneling parameters and other fields need be configured in ctx desc
+ * if the outer checksum offload is enabled.
+ */
+
 #define IAVF_VECTOR_PATH 0
 #define IAVF_VECTOR_OFFLOAD_PATH 1
+#define IAVF_VECTOR_CTX_PATH 2
+#define IAVF_VECTOR_CTX_OFFLOAD_PATH 3
 
 #define DEFAULT_TX_RS_THRESH     32
 #define DEFAULT_TX_FREE_THRESH   32
@@ -281,6 +294,7 @@ struct iavf_tx_queue {
 #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2	BIT(1)
 	uint8_t vlan_flag;
 	uint8_t tc;
+	uint8_t use_ctx:1;            /* if use the ctx desc, a packet needs two descriptors */
 };
 
 /* Offload features */
@@ -713,6 +727,10 @@ uint16_t iavf_xmit_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t iavf_xmit_pkts_vec_avx512_offload(void *tx_queue,
 					   struct rte_mbuf **tx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_xmit_pkts_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts);
+uint16_t iavf_xmit_pkts_vec_avx512_ctx_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts);
 int iavf_txq_vec_setup_avx512(struct iavf_tx_queue *txq);
 
 uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index b416a716cf..5538cc0397 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -1782,13 +1782,13 @@ iavf_tx_free_bufs_avx512(struct iavf_tx_queue *txq)
 	    rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
 
-	n = txq->rs_thresh;
+	n = txq->rs_thresh >> txq->use_ctx;
 
 	 /* first buffer to free from S/W ring is at index
 	  * tx_next_dd - (tx_rs_thresh-1)
 	  */
 	txep = (void *)txq->sw_ring;
-	txep += txq->next_dd - (n - 1);
+	txep += (txq->next_dd >> txq->use_ctx) - (n - 1);
 
 	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) {
 		struct rte_mempool *mp = txep[0].mbuf->pool;
@@ -1887,14 +1887,15 @@ tx_backlog_entry_avx512(struct iavf_tx_vec_entry *txep,
 
 static __rte_always_inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
-	  struct rte_mbuf *pkt, uint64_t flags, bool offload)
+	  struct rte_mbuf *pkt, uint64_t flags,
+	  bool offload, uint8_t vlan_flag)
 {
 	uint64_t high_qw =
 		(IAVF_TX_DESC_DTYPE_DATA |
 		 ((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT) |
 		 ((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
 	if (offload)
-		iavf_txd_enable_offload(pkt, &high_qw);
+		iavf_txd_enable_offload(pkt, &high_qw, vlan_flag);
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1905,15 +1906,15 @@ iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 #define IAVF_TX_OFF_MASK 0x55
 static __rte_always_inline void
 iavf_vtx(volatile struct iavf_tx_desc *txdp,
-	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
-	 bool offload)
+		struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
+		bool offload, uint8_t vlan_flag)
 {
 	const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
 			((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
-		iavf_vtx1(txdp, *pkt, flags, offload);
+		iavf_vtx1(txdp, *pkt, flags, offload, vlan_flag);
 		nb_pkts--, txdp++, pkt++;
 	}
 
@@ -1923,26 +1924,24 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[3], &hi_qw3);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[2], &hi_qw2);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[1], &hi_qw1);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[0], &hi_qw0);
+		if (offload) {
+			iavf_txd_enable_offload(pkt[3], &hi_qw3, vlan_flag);
+			iavf_txd_enable_offload(pkt[2], &hi_qw2, vlan_flag);
+			iavf_txd_enable_offload(pkt[1], &hi_qw1, vlan_flag);
+			iavf_txd_enable_offload(pkt[0], &hi_qw0, vlan_flag);
+		}
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1959,11 +1958,187 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
 
 	/* do any last ones */
 	while (nb_pkts) {
-		iavf_vtx1(txdp, *pkt, flags, offload);
+		iavf_vtx1(txdp, *pkt, flags, offload, vlan_flag);
 		txdp++, pkt++, nb_pkts--;
 	}
 }
 
+static __rte_always_inline void
+iavf_fill_ctx_desc_tunneling_avx512(uint64_t *low_ctx_qw, struct rte_mbuf *pkt)
+{
+	if (pkt->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		uint64_t eip_typ = IAVF_TX_CTX_DESC_EIPT_NONE;
+		uint64_t eip_len = 0;
+		uint64_t eip_noinc = 0;
+		/* Default - IP_ID is increment in each segment of LSO */
+
+		switch (pkt->ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 |
+				RTE_MBUF_F_TX_OUTER_IPV6 |
+				RTE_MBUF_F_TX_OUTER_IP_CKSUM)) {
+		case RTE_MBUF_F_TX_OUTER_IPV4:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		case RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		case RTE_MBUF_F_TX_OUTER_IPV6:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV6;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		}
+
+		/* L4TUNT: L4 Tunneling Type */
+		switch (pkt->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		case RTE_MBUF_F_TX_TUNNEL_IPIP:
+			/* for non UDP / GRE tunneling, set to 00b */
+			break;
+		case RTE_MBUF_F_TX_TUNNEL_VXLAN:
+		case RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE:
+		case RTE_MBUF_F_TX_TUNNEL_GTP:
+		case RTE_MBUF_F_TX_TUNNEL_GENEVE:
+			eip_typ |= IAVF_TXD_CTX_UDP_TUNNELING;
+			break;
+		case RTE_MBUF_F_TX_TUNNEL_GRE:
+			eip_typ |= IAVF_TXD_CTX_GRE_TUNNELING;
+			break;
+		default:
+			PMD_TX_LOG(ERR, "Tunnel type not supported");
+			return;
+		}
+
+		/* L4TUNLEN: L4 Tunneling Length, in Words
+		 *
+		 * We depend on app to set rte_mbuf.l2_len correctly.
+		 * For IP in GRE it should be set to the length of the GRE
+		 * header;
+		 * For MAC in GRE or MAC in UDP it should be set to the length
+		 * of the GRE or UDP headers plus the inner MAC up to including
+		 * its last Ethertype.
+		 * If MPLS labels exists, it should include them as well.
+		 */
+		eip_typ |= (pkt->l2_len >> 1) << IAVF_TXD_CTX_QW0_NATLEN_SHIFT;
+
+		/**
+		 * Calculate the tunneling UDP checksum.
+		 * Shall be set only if L4TUNT = 01b and EIPT is not zero
+		 */
+		if ((eip_typ & (IAVF_TX_CTX_EXT_IP_IPV4 |
+					IAVF_TX_CTX_EXT_IP_IPV6 |
+					IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM)) &&
+				(eip_typ & IAVF_TXD_CTX_UDP_TUNNELING) &&
+				(pkt->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM))
+			eip_typ |= IAVF_TXD_CTX_QW0_L4T_CS_MASK;
+
+		*low_ctx_qw = eip_typ << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT |
+			eip_len << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT |
+			eip_noinc << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT;
+
+	} else {
+		*low_ctx_qw = 0;
+	}
+}
+
+static __rte_always_inline void
+ctx_vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt,
+		uint64_t flags, bool offload, uint8_t vlan_flag)
+{
+	uint64_t high_ctx_qw = IAVF_TX_DESC_DTYPE_CONTEXT;
+	uint64_t low_ctx_qw = 0;
+
+	if (((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) || offload)) {
+		if (offload)
+			iavf_fill_ctx_desc_tunneling_avx512(&low_ctx_qw, pkt);
+		if ((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) ||
+				(vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)) {
+			high_ctx_qw |= IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+			low_ctx_qw |= (uint64_t)pkt->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+		}
+	}
+	uint64_t high_data_qw = (IAVF_TX_DESC_DTYPE_DATA |
+				((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT) |
+				((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
+	if (offload)
+		iavf_txd_enable_offload(pkt, &high_data_qw, vlan_flag);
+
+	__m256i ctx_data_desc = _mm256_set_epi64x(high_data_qw, pkt->buf_iova + pkt->data_off,
+							high_ctx_qw, low_ctx_qw);
+
+	_mm256_storeu_si256((__m256i *)txdp, ctx_data_desc);
+}
+
+static __rte_always_inline void
+ctx_vtx(volatile struct iavf_tx_desc *txdp,
+		struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
+		bool offload, uint8_t vlan_flag)
+{
+	uint64_t hi_data_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
+					((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT));
+
+	/* if unaligned on 32-bit boundary, do one to align */
+	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
+		ctx_vtx1(txdp, *pkt, flags, offload, vlan_flag);
+		nb_pkts--, txdp++, pkt++;
+	}
+
+	for (; nb_pkts > 1; txdp += 4, pkt += 2, nb_pkts -= 2) {
+		uint64_t hi_ctx_qw1 = IAVF_TX_DESC_DTYPE_CONTEXT;
+		uint64_t hi_ctx_qw0 = IAVF_TX_DESC_DTYPE_CONTEXT;
+		uint64_t low_ctx_qw1 = 0;
+		uint64_t low_ctx_qw0 = 0;
+		uint64_t hi_data_qw1 = 0;
+		uint64_t hi_data_qw0 = 0;
+
+		hi_data_qw1 = hi_data_qw_tmpl |
+				((uint64_t)pkt[1]->data_len <<
+					IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+		hi_data_qw0 = hi_data_qw_tmpl |
+				((uint64_t)pkt[0]->data_len <<
+					IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+
+		if (pkt[1]->ol_flags & RTE_MBUF_F_TX_VLAN) {
+			if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+				hi_ctx_qw1 |=
+					IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+				low_ctx_qw1 |=
+					(uint64_t)pkt[1]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+			} else {
+				hi_data_qw1 |=
+					(uint64_t)pkt[1]->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT;
+			}
+		}
+
+		if (pkt[0]->ol_flags & RTE_MBUF_F_TX_VLAN) {
+			if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+				hi_ctx_qw0 |=
+					IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+				low_ctx_qw0 |=
+					(uint64_t)pkt[0]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+			} else {
+				hi_data_qw0 |=
+					(uint64_t)pkt[0]->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT;
+			}
+		}
+
+		if (offload) {
+			iavf_txd_enable_offload(pkt[1], &hi_data_qw1, vlan_flag);
+			iavf_txd_enable_offload(pkt[0], &hi_data_qw0, vlan_flag);
+		}
+
+		__m512i desc0_3 =
+				_mm512_set_epi64
+						(hi_data_qw1, pkt[1]->buf_iova + pkt[1]->data_off,
+						hi_ctx_qw1, low_ctx_qw1,
+						hi_data_qw0, pkt[0]->buf_iova + pkt[0]->data_off,
+						hi_ctx_qw0, low_ctx_qw0);
+		_mm512_storeu_si512((void *)txdp, desc0_3);
+	}
+
+	if (nb_pkts)
+		ctx_vtx1(txdp, *pkt, flags, offload, vlan_flag);
+}
+
 static __rte_always_inline uint16_t
 iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 				 uint16_t nb_pkts, bool offload)
@@ -1994,11 +2169,11 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	if (nb_commit >= n) {
 		tx_backlog_entry_avx512(txep, tx_pkts, n);
 
-		iavf_vtx(txdp, tx_pkts, n - 1, flags, offload);
+		iavf_vtx(txdp, tx_pkts, n - 1, flags, offload, txq->vlan_flag);
 		tx_pkts += (n - 1);
 		txdp += (n - 1);
 
-		iavf_vtx1(txdp, *tx_pkts++, rs, offload);
+		iavf_vtx1(txdp, *tx_pkts++, rs, offload, txq->vlan_flag);
 
 		nb_commit = (uint16_t)(nb_commit - n);
 
@@ -2013,7 +2188,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	tx_backlog_entry_avx512(txep, tx_pkts, nb_commit);
 
-	iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload);
+	iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload, txq->vlan_flag);
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
@@ -2031,6 +2206,73 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_pkts;
 }
 
+static __rte_always_inline uint16_t
+iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+				 uint16_t nb_pkts, bool offload)
+{
+	struct iavf_tx_queue *txq = (struct iavf_tx_queue *)tx_queue;
+	volatile struct iavf_tx_desc *txdp;
+	struct iavf_tx_vec_entry *txep;
+	uint16_t n, nb_commit, nb_mbuf, tx_id;
+	/* bit2 is reserved and must be set to 1 according to Spec */
+	uint64_t flags = IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_ICRC;
+	uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
+
+	if (txq->nb_free < txq->free_thresh)
+		iavf_tx_free_bufs_avx512(txq);
+
+	nb_commit = (uint16_t)RTE_MIN(txq->nb_free, nb_pkts << 1);
+	nb_commit &= 0xFFFE;
+	if (unlikely(nb_commit == 0))
+		return 0;
+
+	nb_pkts = nb_commit >> 1;
+	tx_id = txq->tx_tail;
+	txdp = &txq->tx_ring[tx_id];
+	txep = (void *)txq->sw_ring;
+	txep += (tx_id >> 1);
+
+	txq->nb_free = (uint16_t)(txq->nb_free - nb_commit);
+	n = (uint16_t)(txq->nb_tx_desc - tx_id);
+
+	if (n != 0 && nb_commit >= n) {
+		nb_mbuf = n >> 1;
+		tx_backlog_entry_avx512(txep, tx_pkts, nb_mbuf);
+
+		ctx_vtx(txdp, tx_pkts, nb_mbuf - 1, flags, offload, txq->vlan_flag);
+		tx_pkts += (nb_mbuf - 1);
+		txdp += (n - 2);
+		ctx_vtx1(txdp, *tx_pkts++, rs, offload, txq->vlan_flag);
+
+		nb_commit = (uint16_t)(nb_commit - n);
+
+		txq->next_rs = (uint16_t)(txq->rs_thresh - 1);
+		tx_id = 0;
+		/* avoid reach the end of ring */
+		txdp = txq->tx_ring;
+		txep = (void *)txq->sw_ring;
+	}
+
+	nb_mbuf = nb_commit >> 1;
+	tx_backlog_entry_avx512(txep, tx_pkts, nb_mbuf);
+
+	ctx_vtx(txdp, tx_pkts, nb_mbuf, flags, offload, txq->vlan_flag);
+	tx_id = (uint16_t)(tx_id + nb_commit);
+
+	if (tx_id > txq->next_rs) {
+		txq->tx_ring[txq->next_rs].cmd_type_offset_bsz |=
+			rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
+					 IAVF_TXD_QW1_CMD_SHIFT);
+		txq->next_rs =
+			(uint16_t)(txq->next_rs + txq->rs_thresh);
+	}
+
+	txq->tx_tail = tx_id;
+
+	IAVF_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail);
+	return nb_pkts;
+}
+
 static __rte_always_inline uint16_t
 iavf_xmit_pkts_vec_avx512_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,
 			      uint16_t nb_pkts, bool offload)
@@ -2071,9 +2313,11 @@ iavf_tx_queue_release_mbufs_avx512(struct iavf_tx_queue *txq)
 	if (!txq->sw_ring || txq->nb_free == max_desc)
 		return;
 
-	i = txq->next_dd - txq->rs_thresh + 1;
+	i = (txq->next_dd >> txq->use_ctx) + 1 -
+			(txq->rs_thresh >> txq->use_ctx);
+
 	if (txq->tx_tail < i) {
-		for (; i < txq->nb_tx_desc; i++) {
+		for (; i < (unsigned int)(txq->nb_tx_desc >> txq->use_ctx); i++) {
 			rte_pktmbuf_free_seg(swr[i].mbuf);
 			swr[i].mbuf = NULL;
 		}
@@ -2094,3 +2338,41 @@ iavf_xmit_pkts_vec_avx512_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
 {
 	return iavf_xmit_pkts_vec_avx512_cmn(tx_queue, tx_pkts, nb_pkts, true);
 }
+
+static __rte_always_inline uint16_t
+iavf_xmit_pkts_vec_avx512_ctx_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts, bool offload)
+{
+	uint16_t nb_tx = 0;
+	struct iavf_tx_queue *txq = (struct iavf_tx_queue *)tx_queue;
+
+	while (nb_pkts) {
+		uint16_t ret, num;
+
+		/* cross rs_thresh boundary is not allowed */
+		num = (uint16_t)RTE_MIN(nb_pkts << 1, txq->rs_thresh);
+		num = num >> 1;
+		ret = iavf_xmit_fixed_burst_vec_avx512_ctx(tx_queue, &tx_pkts[nb_tx],
+						       num, offload);
+		nb_tx += ret;
+		nb_pkts -= ret;
+		if (ret < num)
+			break;
+	}
+
+	return nb_tx;
+}
+
+uint16_t
+iavf_xmit_pkts_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts)
+{
+	return iavf_xmit_pkts_vec_avx512_ctx_cmn(tx_queue, tx_pkts, nb_pkts, false);
+}
+
+uint16_t
+iavf_xmit_pkts_vec_avx512_ctx_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts)
+{
+	return iavf_xmit_pkts_vec_avx512_ctx_cmn(tx_queue, tx_pkts, nb_pkts, true);
+}
diff --git a/drivers/net/iavf/iavf_rxtx_vec_common.h b/drivers/net/iavf/iavf_rxtx_vec_common.h
index a59cb2ceee..9568ce2dd0 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/iavf/iavf_rxtx_vec_common.h
@@ -253,10 +253,28 @@ iavf_tx_vec_queue_default(struct iavf_tx_queue *txq)
 	if (txq->offloads & IAVF_TX_NO_VECTOR_FLAGS)
 		return -1;
 
-	if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD)
-		return IAVF_VECTOR_OFFLOAD_PATH;
-
-	return IAVF_VECTOR_PATH;
+	/**
+	 * Vlan tci needs to be inserted via ctx desc, if the vlan_flag is L2TAG2.
+	 * Tunneling parameters and other fields need be configured in ctx desc
+	 * if the outer checksum offload is enabled.
+	 */
+	if (txq->vlan_flag == IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+		txq->use_ctx = 1;
+		if (txq->offloads & (IAVF_TX_VECTOR_OFFLOAD |
+				IAVF_TX_VECTOR_OFFLOAD_CTX))
+			return IAVF_VECTOR_CTX_OFFLOAD_PATH;
+		else
+			return IAVF_VECTOR_CTX_PATH;
+	} else {
+		if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD_CTX) {
+			txq->use_ctx = 1;
+			return IAVF_VECTOR_CTX_OFFLOAD_PATH;
+		} else if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD) {
+			return IAVF_VECTOR_OFFLOAD_PATH;
+		} else {
+			return IAVF_VECTOR_PATH;
+		}
+	}
 }
 
 static inline int
@@ -313,7 +331,7 @@ iavf_tx_vec_dev_check_default(struct rte_eth_dev *dev)
 
 static __rte_always_inline void
 iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
-			uint64_t *txd_hi)
+			uint64_t *txd_hi, uint8_t vlan_flag)
 {
 #if defined(IAVF_TX_CSUM_OFFLOAD) || defined(IAVF_TX_VLAN_QINQ_OFFLOAD)
 	uint64_t ol_flags = tx_pkt->ol_flags;
@@ -325,14 +343,20 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 
 #ifdef IAVF_TX_CSUM_OFFLOAD
 	/* Set MACLEN */
-	td_offset |= (tx_pkt->l2_len >> 1) <<
-		     IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+	if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
+		td_offset |= (tx_pkt->outer_l2_len >> 1)
+			<< IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+	else
+		td_offset |= (tx_pkt->l2_len >> 1)
+			<< IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
 
 	/* Enable L3 checksum offloads */
 	if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
-		td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
-		td_offset |= (tx_pkt->l3_len >> 2) <<
-			     IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+		if (ol_flags & RTE_MBUF_F_TX_IPV4) {
+			td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
+			td_offset |= (tx_pkt->l3_len >> 2) <<
+				     IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+		}
 	} else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
 		td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
 		td_offset |= (tx_pkt->l3_len >> 2) <<
@@ -368,7 +392,8 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 #endif
 
 #ifdef IAVF_TX_VLAN_QINQ_OFFLOAD
-	if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+	if ((ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) &&
+		(vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1)) {
 		td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
 		*txd_hi |= ((uint64_t)tx_pkt->vlan_tci <<
 			    IAVF_TXD_QW1_L2TAG1_SHIFT);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] net/iavf: enable tx outer checksum offload on avx512
  2022-12-22  7:32 [PATCH] net/iavf: enable tx outer checksum offload on avx512 Zhichao Zeng
@ 2023-02-02  5:58 ` Zhichao Zeng
  2023-02-02  6:24   ` Xu, Ke1
  2023-02-17  1:49   ` [PATCH v3] net/iavf: enable Tx " Zhichao Zeng
  0 siblings, 2 replies; 8+ messages in thread
From: Zhichao Zeng @ 2023-02-02  5:58 UTC (permalink / raw)
  To: dev
  Cc: qi.z.zhang, yidingx.zhou, ke1.xu, Zhichao Zeng, Jingjing Wu,
	Beilei Xing, Bruce Richardson, Konstantin Ananyev

This patch is to enable outer checksum offload on avx512 Tx path
for tunnel packet by adding Tx path with context descriptor
and adjusting path select logic.

Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>

---
v2: fix path select logic
---
 drivers/net/iavf/iavf_rxtx.c            |  21 +-
 drivers/net/iavf/iavf_rxtx.h            |  22 +-
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 324 ++++++++++++++++++++++--
 drivers/net/iavf/iavf_rxtx_vec_common.h |  47 +++-
 4 files changed, 376 insertions(+), 38 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 3d9224b38d..3d60978901 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -781,10 +781,13 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		else
 			insertion_cap = insertion_support->inner;
 
-		if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+		if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) {
 			txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-		else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
+			PMD_INIT_LOG(DEBUG, "VLAN insertion_cap: L2TAG1");
+		} else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2) {
 			txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2;
+			PMD_INIT_LOG(DEBUG, "VLAN insertion_cap: L2TAG2");
+		}
 	} else {
 		txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
 	}
@@ -3229,7 +3232,7 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 	if (check_ret >= 0 &&
 	    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
 		/* SSE and AVX2 not support offload path yet. */
-		if (check_ret == IAVF_VECTOR_PATH) {
+		if (check_ret == IAVF_VECTOR_PATH || check_ret == IAVF_VECTOR_CTX_PATH) {
 			use_sse = true;
 			if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
 			     rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
@@ -3261,11 +3264,21 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 Vector Tx (port %d).",
 					    dev->data->port_id);
-			} else {
+			} else if (check_ret == IAVF_VECTOR_OFFLOAD_PATH) {
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_offload;
 				dev->tx_pkt_prepare = iavf_prep_pkts;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 OFFLOAD Vector Tx (port %d).",
 					    dev->data->port_id);
+			} else if (check_ret == IAVF_VECTOR_CTX_PATH) {
+				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_ctx;
+				dev->tx_pkt_prepare = iavf_prep_pkts;
+				PMD_DRV_LOG(DEBUG, "Using AVX512 CONTEXT Vector Tx (port %d).",
+					    dev->data->port_id);
+			} else {
+				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_ctx_offload;
+				dev->tx_pkt_prepare = iavf_prep_pkts;
+				PMD_DRV_LOG(DEBUG, "Using AVX512 CONTEXT OFFLOAD Vector Tx (port %d).",
+					    dev->data->port_id);
 			}
 		}
 #endif
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index a6ad88885b..3e030d1ffc 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -26,8 +26,6 @@
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
 		RTE_ETH_TX_OFFLOAD_MULTI_SEGS |		 \
 		RTE_ETH_TX_OFFLOAD_TCP_TSO |		 \
-		RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |    \
-		RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM |	\
 		RTE_ETH_TX_OFFLOAD_SECURITY)
 
 #define IAVF_TX_VECTOR_OFFLOAD (				 \
@@ -38,14 +36,29 @@
 		RTE_ETH_TX_OFFLOAD_UDP_CKSUM |		 \
 		RTE_ETH_TX_OFFLOAD_TCP_CKSUM)
 
+#define IAVF_TX_VECTOR_OFFLOAD_CTX (			\
+		RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |	\
+		RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM)
+
 #define IAVF_RX_VECTOR_OFFLOAD (				 \
 		RTE_ETH_RX_OFFLOAD_CHECKSUM |		 \
 		RTE_ETH_RX_OFFLOAD_SCTP_CKSUM |		 \
 		RTE_ETH_RX_OFFLOAD_VLAN |		 \
 		RTE_ETH_RX_OFFLOAD_RSS_HASH)
 
+/**
+ * According to the vlan capabilities returned by the driver and FW, the vlan tci
+ * needs to be inserted to the L2TAG1 or L2TAG2 fields.
+ * If L2TAG1, it should be inserted to the L2TAG1 field in data desc.
+ * If L2TAG2, it should be inserted to the L2TAG2 field in ctx desc.
+ * Besides, tunneling parameters and other fields need be configured in ctx desc
+ * if the outer checksum offload is enabled.
+ */
+
 #define IAVF_VECTOR_PATH 0
 #define IAVF_VECTOR_OFFLOAD_PATH 1
+#define IAVF_VECTOR_CTX_PATH 2
+#define IAVF_VECTOR_CTX_OFFLOAD_PATH 3
 
 #define DEFAULT_TX_RS_THRESH     32
 #define DEFAULT_TX_FREE_THRESH   32
@@ -281,6 +294,7 @@ struct iavf_tx_queue {
 #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2	BIT(1)
 	uint8_t vlan_flag;
 	uint8_t tc;
+	uint8_t use_ctx:1;            /* if use the ctx desc, a packet needs two descriptors */
 };
 
 /* Offload features */
@@ -713,6 +727,10 @@ uint16_t iavf_xmit_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t iavf_xmit_pkts_vec_avx512_offload(void *tx_queue,
 					   struct rte_mbuf **tx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_xmit_pkts_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts);
+uint16_t iavf_xmit_pkts_vec_avx512_ctx_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts);
 int iavf_txq_vec_setup_avx512(struct iavf_tx_queue *txq);
 
 uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index b416a716cf..5538cc0397 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -1782,13 +1782,13 @@ iavf_tx_free_bufs_avx512(struct iavf_tx_queue *txq)
 	    rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
 
-	n = txq->rs_thresh;
+	n = txq->rs_thresh >> txq->use_ctx;
 
 	 /* first buffer to free from S/W ring is at index
 	  * tx_next_dd - (tx_rs_thresh-1)
 	  */
 	txep = (void *)txq->sw_ring;
-	txep += txq->next_dd - (n - 1);
+	txep += (txq->next_dd >> txq->use_ctx) - (n - 1);
 
 	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) {
 		struct rte_mempool *mp = txep[0].mbuf->pool;
@@ -1887,14 +1887,15 @@ tx_backlog_entry_avx512(struct iavf_tx_vec_entry *txep,
 
 static __rte_always_inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
-	  struct rte_mbuf *pkt, uint64_t flags, bool offload)
+	  struct rte_mbuf *pkt, uint64_t flags,
+	  bool offload, uint8_t vlan_flag)
 {
 	uint64_t high_qw =
 		(IAVF_TX_DESC_DTYPE_DATA |
 		 ((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT) |
 		 ((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
 	if (offload)
-		iavf_txd_enable_offload(pkt, &high_qw);
+		iavf_txd_enable_offload(pkt, &high_qw, vlan_flag);
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1905,15 +1906,15 @@ iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 #define IAVF_TX_OFF_MASK 0x55
 static __rte_always_inline void
 iavf_vtx(volatile struct iavf_tx_desc *txdp,
-	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
-	 bool offload)
+		struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
+		bool offload, uint8_t vlan_flag)
 {
 	const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
 			((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
-		iavf_vtx1(txdp, *pkt, flags, offload);
+		iavf_vtx1(txdp, *pkt, flags, offload, vlan_flag);
 		nb_pkts--, txdp++, pkt++;
 	}
 
@@ -1923,26 +1924,24 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[3], &hi_qw3);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[2], &hi_qw2);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[1], &hi_qw1);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[0], &hi_qw0);
+		if (offload) {
+			iavf_txd_enable_offload(pkt[3], &hi_qw3, vlan_flag);
+			iavf_txd_enable_offload(pkt[2], &hi_qw2, vlan_flag);
+			iavf_txd_enable_offload(pkt[1], &hi_qw1, vlan_flag);
+			iavf_txd_enable_offload(pkt[0], &hi_qw0, vlan_flag);
+		}
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1959,11 +1958,187 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
 
 	/* do any last ones */
 	while (nb_pkts) {
-		iavf_vtx1(txdp, *pkt, flags, offload);
+		iavf_vtx1(txdp, *pkt, flags, offload, vlan_flag);
 		txdp++, pkt++, nb_pkts--;
 	}
 }
 
+static __rte_always_inline void
+iavf_fill_ctx_desc_tunneling_avx512(uint64_t *low_ctx_qw, struct rte_mbuf *pkt)
+{
+	if (pkt->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		uint64_t eip_typ = IAVF_TX_CTX_DESC_EIPT_NONE;
+		uint64_t eip_len = 0;
+		uint64_t eip_noinc = 0;
+		/* Default - IP_ID is increment in each segment of LSO */
+
+		switch (pkt->ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 |
+				RTE_MBUF_F_TX_OUTER_IPV6 |
+				RTE_MBUF_F_TX_OUTER_IP_CKSUM)) {
+		case RTE_MBUF_F_TX_OUTER_IPV4:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		case RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		case RTE_MBUF_F_TX_OUTER_IPV6:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV6;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		}
+
+		/* L4TUNT: L4 Tunneling Type */
+		switch (pkt->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		case RTE_MBUF_F_TX_TUNNEL_IPIP:
+			/* for non UDP / GRE tunneling, set to 00b */
+			break;
+		case RTE_MBUF_F_TX_TUNNEL_VXLAN:
+		case RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE:
+		case RTE_MBUF_F_TX_TUNNEL_GTP:
+		case RTE_MBUF_F_TX_TUNNEL_GENEVE:
+			eip_typ |= IAVF_TXD_CTX_UDP_TUNNELING;
+			break;
+		case RTE_MBUF_F_TX_TUNNEL_GRE:
+			eip_typ |= IAVF_TXD_CTX_GRE_TUNNELING;
+			break;
+		default:
+			PMD_TX_LOG(ERR, "Tunnel type not supported");
+			return;
+		}
+
+		/* L4TUNLEN: L4 Tunneling Length, in Words
+		 *
+		 * We depend on app to set rte_mbuf.l2_len correctly.
+		 * For IP in GRE it should be set to the length of the GRE
+		 * header;
+		 * For MAC in GRE or MAC in UDP it should be set to the length
+		 * of the GRE or UDP headers plus the inner MAC up to including
+		 * its last Ethertype.
+		 * If MPLS labels exists, it should include them as well.
+		 */
+		eip_typ |= (pkt->l2_len >> 1) << IAVF_TXD_CTX_QW0_NATLEN_SHIFT;
+
+		/**
+		 * Calculate the tunneling UDP checksum.
+		 * Shall be set only if L4TUNT = 01b and EIPT is not zero
+		 */
+		if ((eip_typ & (IAVF_TX_CTX_EXT_IP_IPV4 |
+					IAVF_TX_CTX_EXT_IP_IPV6 |
+					IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM)) &&
+				(eip_typ & IAVF_TXD_CTX_UDP_TUNNELING) &&
+				(pkt->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM))
+			eip_typ |= IAVF_TXD_CTX_QW0_L4T_CS_MASK;
+
+		*low_ctx_qw = eip_typ << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT |
+			eip_len << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT |
+			eip_noinc << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT;
+
+	} else {
+		*low_ctx_qw = 0;
+	}
+}
+
+static __rte_always_inline void
+ctx_vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt,
+		uint64_t flags, bool offload, uint8_t vlan_flag)
+{
+	uint64_t high_ctx_qw = IAVF_TX_DESC_DTYPE_CONTEXT;
+	uint64_t low_ctx_qw = 0;
+
+	if (((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) || offload)) {
+		if (offload)
+			iavf_fill_ctx_desc_tunneling_avx512(&low_ctx_qw, pkt);
+		if ((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) ||
+				(vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)) {
+			high_ctx_qw |= IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+			low_ctx_qw |= (uint64_t)pkt->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+		}
+	}
+	uint64_t high_data_qw = (IAVF_TX_DESC_DTYPE_DATA |
+				((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT) |
+				((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
+	if (offload)
+		iavf_txd_enable_offload(pkt, &high_data_qw, vlan_flag);
+
+	__m256i ctx_data_desc = _mm256_set_epi64x(high_data_qw, pkt->buf_iova + pkt->data_off,
+							high_ctx_qw, low_ctx_qw);
+
+	_mm256_storeu_si256((__m256i *)txdp, ctx_data_desc);
+}
+
+static __rte_always_inline void
+ctx_vtx(volatile struct iavf_tx_desc *txdp,
+		struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
+		bool offload, uint8_t vlan_flag)
+{
+	uint64_t hi_data_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
+					((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT));
+
+	/* if unaligned on 32-bit boundary, do one to align */
+	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
+		ctx_vtx1(txdp, *pkt, flags, offload, vlan_flag);
+		nb_pkts--, txdp++, pkt++;
+	}
+
+	for (; nb_pkts > 1; txdp += 4, pkt += 2, nb_pkts -= 2) {
+		uint64_t hi_ctx_qw1 = IAVF_TX_DESC_DTYPE_CONTEXT;
+		uint64_t hi_ctx_qw0 = IAVF_TX_DESC_DTYPE_CONTEXT;
+		uint64_t low_ctx_qw1 = 0;
+		uint64_t low_ctx_qw0 = 0;
+		uint64_t hi_data_qw1 = 0;
+		uint64_t hi_data_qw0 = 0;
+
+		hi_data_qw1 = hi_data_qw_tmpl |
+				((uint64_t)pkt[1]->data_len <<
+					IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+		hi_data_qw0 = hi_data_qw_tmpl |
+				((uint64_t)pkt[0]->data_len <<
+					IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+
+		if (pkt[1]->ol_flags & RTE_MBUF_F_TX_VLAN) {
+			if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+				hi_ctx_qw1 |=
+					IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+				low_ctx_qw1 |=
+					(uint64_t)pkt[1]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+			} else {
+				hi_data_qw1 |=
+					(uint64_t)pkt[1]->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT;
+			}
+		}
+
+		if (pkt[0]->ol_flags & RTE_MBUF_F_TX_VLAN) {
+			if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+				hi_ctx_qw0 |=
+					IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+				low_ctx_qw0 |=
+					(uint64_t)pkt[0]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+			} else {
+				hi_data_qw0 |=
+					(uint64_t)pkt[0]->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT;
+			}
+		}
+
+		if (offload) {
+			iavf_txd_enable_offload(pkt[1], &hi_data_qw1, vlan_flag);
+			iavf_txd_enable_offload(pkt[0], &hi_data_qw0, vlan_flag);
+		}
+
+		__m512i desc0_3 =
+				_mm512_set_epi64
+						(hi_data_qw1, pkt[1]->buf_iova + pkt[1]->data_off,
+						hi_ctx_qw1, low_ctx_qw1,
+						hi_data_qw0, pkt[0]->buf_iova + pkt[0]->data_off,
+						hi_ctx_qw0, low_ctx_qw0);
+		_mm512_storeu_si512((void *)txdp, desc0_3);
+	}
+
+	if (nb_pkts)
+		ctx_vtx1(txdp, *pkt, flags, offload, vlan_flag);
+}
+
 static __rte_always_inline uint16_t
 iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 				 uint16_t nb_pkts, bool offload)
@@ -1994,11 +2169,11 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	if (nb_commit >= n) {
 		tx_backlog_entry_avx512(txep, tx_pkts, n);
 
-		iavf_vtx(txdp, tx_pkts, n - 1, flags, offload);
+		iavf_vtx(txdp, tx_pkts, n - 1, flags, offload, txq->vlan_flag);
 		tx_pkts += (n - 1);
 		txdp += (n - 1);
 
-		iavf_vtx1(txdp, *tx_pkts++, rs, offload);
+		iavf_vtx1(txdp, *tx_pkts++, rs, offload, txq->vlan_flag);
 
 		nb_commit = (uint16_t)(nb_commit - n);
 
@@ -2013,7 +2188,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	tx_backlog_entry_avx512(txep, tx_pkts, nb_commit);
 
-	iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload);
+	iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload, txq->vlan_flag);
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
@@ -2031,6 +2206,73 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_pkts;
 }
 
+static __rte_always_inline uint16_t
+iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+				 uint16_t nb_pkts, bool offload)
+{
+	struct iavf_tx_queue *txq = (struct iavf_tx_queue *)tx_queue;
+	volatile struct iavf_tx_desc *txdp;
+	struct iavf_tx_vec_entry *txep;
+	uint16_t n, nb_commit, nb_mbuf, tx_id;
+	/* bit2 is reserved and must be set to 1 according to Spec */
+	uint64_t flags = IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_ICRC;
+	uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
+
+	if (txq->nb_free < txq->free_thresh)
+		iavf_tx_free_bufs_avx512(txq);
+
+	nb_commit = (uint16_t)RTE_MIN(txq->nb_free, nb_pkts << 1);
+	nb_commit &= 0xFFFE;
+	if (unlikely(nb_commit == 0))
+		return 0;
+
+	nb_pkts = nb_commit >> 1;
+	tx_id = txq->tx_tail;
+	txdp = &txq->tx_ring[tx_id];
+	txep = (void *)txq->sw_ring;
+	txep += (tx_id >> 1);
+
+	txq->nb_free = (uint16_t)(txq->nb_free - nb_commit);
+	n = (uint16_t)(txq->nb_tx_desc - tx_id);
+
+	if (n != 0 && nb_commit >= n) {
+		nb_mbuf = n >> 1;
+		tx_backlog_entry_avx512(txep, tx_pkts, nb_mbuf);
+
+		ctx_vtx(txdp, tx_pkts, nb_mbuf - 1, flags, offload, txq->vlan_flag);
+		tx_pkts += (nb_mbuf - 1);
+		txdp += (n - 2);
+		ctx_vtx1(txdp, *tx_pkts++, rs, offload, txq->vlan_flag);
+
+		nb_commit = (uint16_t)(nb_commit - n);
+
+		txq->next_rs = (uint16_t)(txq->rs_thresh - 1);
+		tx_id = 0;
+		/* avoid reach the end of ring */
+		txdp = txq->tx_ring;
+		txep = (void *)txq->sw_ring;
+	}
+
+	nb_mbuf = nb_commit >> 1;
+	tx_backlog_entry_avx512(txep, tx_pkts, nb_mbuf);
+
+	ctx_vtx(txdp, tx_pkts, nb_mbuf, flags, offload, txq->vlan_flag);
+	tx_id = (uint16_t)(tx_id + nb_commit);
+
+	if (tx_id > txq->next_rs) {
+		txq->tx_ring[txq->next_rs].cmd_type_offset_bsz |=
+			rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
+					 IAVF_TXD_QW1_CMD_SHIFT);
+		txq->next_rs =
+			(uint16_t)(txq->next_rs + txq->rs_thresh);
+	}
+
+	txq->tx_tail = tx_id;
+
+	IAVF_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail);
+	return nb_pkts;
+}
+
 static __rte_always_inline uint16_t
 iavf_xmit_pkts_vec_avx512_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,
 			      uint16_t nb_pkts, bool offload)
@@ -2071,9 +2313,11 @@ iavf_tx_queue_release_mbufs_avx512(struct iavf_tx_queue *txq)
 	if (!txq->sw_ring || txq->nb_free == max_desc)
 		return;
 
-	i = txq->next_dd - txq->rs_thresh + 1;
+	i = (txq->next_dd >> txq->use_ctx) + 1 -
+			(txq->rs_thresh >> txq->use_ctx);
+
 	if (txq->tx_tail < i) {
-		for (; i < txq->nb_tx_desc; i++) {
+		for (; i < (unsigned int)(txq->nb_tx_desc >> txq->use_ctx); i++) {
 			rte_pktmbuf_free_seg(swr[i].mbuf);
 			swr[i].mbuf = NULL;
 		}
@@ -2094,3 +2338,41 @@ iavf_xmit_pkts_vec_avx512_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
 {
 	return iavf_xmit_pkts_vec_avx512_cmn(tx_queue, tx_pkts, nb_pkts, true);
 }
+
+static __rte_always_inline uint16_t
+iavf_xmit_pkts_vec_avx512_ctx_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts, bool offload)
+{
+	uint16_t nb_tx = 0;
+	struct iavf_tx_queue *txq = (struct iavf_tx_queue *)tx_queue;
+
+	while (nb_pkts) {
+		uint16_t ret, num;
+
+		/* cross rs_thresh boundary is not allowed */
+		num = (uint16_t)RTE_MIN(nb_pkts << 1, txq->rs_thresh);
+		num = num >> 1;
+		ret = iavf_xmit_fixed_burst_vec_avx512_ctx(tx_queue, &tx_pkts[nb_tx],
+						       num, offload);
+		nb_tx += ret;
+		nb_pkts -= ret;
+		if (ret < num)
+			break;
+	}
+
+	return nb_tx;
+}
+
+uint16_t
+iavf_xmit_pkts_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts)
+{
+	return iavf_xmit_pkts_vec_avx512_ctx_cmn(tx_queue, tx_pkts, nb_pkts, false);
+}
+
+uint16_t
+iavf_xmit_pkts_vec_avx512_ctx_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts)
+{
+	return iavf_xmit_pkts_vec_avx512_ctx_cmn(tx_queue, tx_pkts, nb_pkts, true);
+}
diff --git a/drivers/net/iavf/iavf_rxtx_vec_common.h b/drivers/net/iavf/iavf_rxtx_vec_common.h
index a59cb2ceee..9568ce2dd0 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/iavf/iavf_rxtx_vec_common.h
@@ -253,10 +253,28 @@ iavf_tx_vec_queue_default(struct iavf_tx_queue *txq)
 	if (txq->offloads & IAVF_TX_NO_VECTOR_FLAGS)
 		return -1;
 
-	if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD)
-		return IAVF_VECTOR_OFFLOAD_PATH;
-
-	return IAVF_VECTOR_PATH;
+	/**
+	 * Vlan tci needs to be inserted via ctx desc, if the vlan_flag is L2TAG2.
+	 * Tunneling parameters and other fields need be configured in ctx desc
+	 * if the outer checksum offload is enabled.
+	 */
+	if (txq->vlan_flag == IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+		txq->use_ctx = 1;
+		if (txq->offloads & (IAVF_TX_VECTOR_OFFLOAD |
+				IAVF_TX_VECTOR_OFFLOAD_CTX))
+			return IAVF_VECTOR_CTX_OFFLOAD_PATH;
+		else
+			return IAVF_VECTOR_CTX_PATH;
+	} else {
+		if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD_CTX) {
+			txq->use_ctx = 1;
+			return IAVF_VECTOR_CTX_OFFLOAD_PATH;
+		} else if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD) {
+			return IAVF_VECTOR_OFFLOAD_PATH;
+		} else {
+			return IAVF_VECTOR_PATH;
+		}
+	}
 }
 
 static inline int
@@ -313,7 +331,7 @@ iavf_tx_vec_dev_check_default(struct rte_eth_dev *dev)
 
 static __rte_always_inline void
 iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
-			uint64_t *txd_hi)
+			uint64_t *txd_hi, uint8_t vlan_flag)
 {
 #if defined(IAVF_TX_CSUM_OFFLOAD) || defined(IAVF_TX_VLAN_QINQ_OFFLOAD)
 	uint64_t ol_flags = tx_pkt->ol_flags;
@@ -325,14 +343,20 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 
 #ifdef IAVF_TX_CSUM_OFFLOAD
 	/* Set MACLEN */
-	td_offset |= (tx_pkt->l2_len >> 1) <<
-		     IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+	if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
+		td_offset |= (tx_pkt->outer_l2_len >> 1)
+			<< IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+	else
+		td_offset |= (tx_pkt->l2_len >> 1)
+			<< IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
 
 	/* Enable L3 checksum offloads */
 	if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
-		td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
-		td_offset |= (tx_pkt->l3_len >> 2) <<
-			     IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+		if (ol_flags & RTE_MBUF_F_TX_IPV4) {
+			td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
+			td_offset |= (tx_pkt->l3_len >> 2) <<
+				     IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+		}
 	} else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
 		td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
 		td_offset |= (tx_pkt->l3_len >> 2) <<
@@ -368,7 +392,8 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 #endif
 
 #ifdef IAVF_TX_VLAN_QINQ_OFFLOAD
-	if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+	if ((ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) &&
+		(vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1)) {
 		td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
 		*txd_hi |= ((uint64_t)tx_pkt->vlan_tci <<
 			    IAVF_TXD_QW1_L2TAG1_SHIFT);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v2] net/iavf: enable tx outer checksum offload on avx512
  2023-02-02  5:58 ` [PATCH v2] " Zhichao Zeng
@ 2023-02-02  6:24   ` Xu, Ke1
  2023-02-02  7:09     ` Zhang, Qi Z
  2023-02-17  1:49   ` [PATCH v3] net/iavf: enable Tx " Zhichao Zeng
  1 sibling, 1 reply; 8+ messages in thread
From: Xu, Ke1 @ 2023-02-02  6:24 UTC (permalink / raw)
  To: Zeng, ZhichaoX, dev
  Cc: Zhang, Qi Z, Zhou, YidingX, Wu, Jingjing, Xing, Beilei,
	Richardson, Bruce, Konstantin Ananyev

> -----Original Message-----
> From: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> Sent: Thursday, February 2, 2023 1:59 PM
> To: dev@dpdk.org
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; Zhou, YidingX
> <yidingx.zhou@intel.com>; Xu, Ke1 <ke1.xu@intel.com>; Zeng, ZhichaoX
> <zhichaox.zeng@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>; Xing,
> Beilei <beilei.xing@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>
> Subject: [PATCH v2] net/iavf: enable tx outer checksum offload on avx512
> 
> This patch is to enable outer checksum offload on avx512 Tx path for tunnel
> packet by adding Tx path with context descriptor and adjusting path select
> logic.
> 
> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>

V2 Patch functional tested passed.

Regards,
Tested-by: Ke Xu <ke1.xu@intel.com>

> 
> ---
> v2: fix path select logic
> ---
>  drivers/net/iavf/iavf_rxtx.c            |  21 +-
>  drivers/net/iavf/iavf_rxtx.h            |  22 +-
>  drivers/net/iavf/iavf_rxtx_vec_avx512.c | 324 ++++++++++++++++++++++--
> drivers/net/iavf/iavf_rxtx_vec_common.h |  47 +++-
>  4 files changed, 376 insertions(+), 38 deletions(-)
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v2] net/iavf: enable tx outer checksum offload on avx512
  2023-02-02  6:24   ` Xu, Ke1
@ 2023-02-02  7:09     ` Zhang, Qi Z
  0 siblings, 0 replies; 8+ messages in thread
From: Zhang, Qi Z @ 2023-02-02  7:09 UTC (permalink / raw)
  To: Xu, Ke1, Zeng, ZhichaoX, dev
  Cc: Zhou, YidingX, Wu, Jingjing, Xing, Beilei, Richardson,  Bruce,
	Konstantin Ananyev



> -----Original Message-----
> From: Xu, Ke1 <ke1.xu@intel.com>
> Sent: Thursday, February 2, 2023 2:25 PM
> To: Zeng, ZhichaoX <zhichaox.zeng@intel.com>; dev@dpdk.org
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; Zhou, YidingX
> <yidingx.zhou@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>; Richardson, Bruce <bruce.richardson@intel.com>;
> Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> Subject: RE: [PATCH v2] net/iavf: enable tx outer checksum offload on avx512
> 
> > -----Original Message-----
> > From: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> > Sent: Thursday, February 2, 2023 1:59 PM
> > To: dev@dpdk.org
> > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; Zhou, YidingX
> > <yidingx.zhou@intel.com>; Xu, Ke1 <ke1.xu@intel.com>; Zeng, ZhichaoX
> > <zhichaox.zeng@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>; Xing,
> > Beilei <beilei.xing@intel.com>; Richardson, Bruce
> > <bruce.richardson@intel.com>; Konstantin Ananyev
> > <konstantin.v.ananyev@yandex.ru>
> > Subject: [PATCH v2] net/iavf: enable tx outer checksum offload on
> > avx512
> >
> > This patch is to enable outer checksum offload on avx512 Tx path for
> > tunnel packet by adding Tx path with context descriptor and adjusting
> > path select logic.
> >
> > Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> > Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
> 
> V2 Patch functional tested passed.
> 
> Regards,
> Tested-by: Ke Xu <ke1.xu@intel.com>

Acked-by: Qi Zhang <qi.z.zhang@intel.com>

Applied to dpdk-next-net-intel.

Thanks
Qi


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3] net/iavf: enable Tx outer checksum offload on avx512
  2023-02-02  5:58 ` [PATCH v2] " Zhichao Zeng
  2023-02-02  6:24   ` Xu, Ke1
@ 2023-02-17  1:49   ` Zhichao Zeng
  2023-02-17  1:50     ` Xu, Ke1
  1 sibling, 1 reply; 8+ messages in thread
From: Zhichao Zeng @ 2023-02-17  1:49 UTC (permalink / raw)
  To: dev
  Cc: qiming.yang, yidingx.zhou, qi.z.zhang, ke1.xu, Zhichao Zeng,
	Jingjing Wu, Beilei Xing, Bruce Richardson, Konstantin Ananyev

This patch is to enable outer checksum offload on avx512 Tx path
for tunnel packet by adding Tx path with context descriptor
and adjusting path select logic.

Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>

---
v3: fix tunneling parameters
---
v2: fix path select logic
---
 drivers/net/iavf/iavf_rxtx.c            |  14 +-
 drivers/net/iavf/iavf_rxtx.h            |  19 +-
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 392 ++++++++++++++++++++++--
 drivers/net/iavf/iavf_rxtx_vec_common.h |  44 ++-
 4 files changed, 432 insertions(+), 37 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 3d9224b38d..a860350919 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -781,10 +781,13 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		else
 			insertion_cap = insertion_support->inner;
 
-		if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+		if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) {
 			txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-		else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
+			PMD_INIT_LOG(DEBUG, "VLAN insertion_cap: L2TAG1");
+		} else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2) {
 			txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2;
+			PMD_INIT_LOG(DEBUG, "VLAN insertion_cap: L2TAG2");
+		}
 	} else {
 		txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
 	}
@@ -3261,11 +3264,16 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 Vector Tx (port %d).",
 					    dev->data->port_id);
-			} else {
+			} else if (check_ret == IAVF_VECTOR_OFFLOAD_PATH) {
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_offload;
 				dev->tx_pkt_prepare = iavf_prep_pkts;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 OFFLOAD Vector Tx (port %d).",
 					    dev->data->port_id);
+			} else {
+				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_ctx_offload;
+				dev->tx_pkt_prepare = iavf_prep_pkts;
+				PMD_DRV_LOG(DEBUG, "Using AVX512 CONTEXT OFFLOAD Vector Tx (port %d).",
+					    dev->data->port_id);
 			}
 		}
 #endif
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index a6ad88885b..09e2127db0 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -26,8 +26,6 @@
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
 		RTE_ETH_TX_OFFLOAD_MULTI_SEGS |		 \
 		RTE_ETH_TX_OFFLOAD_TCP_TSO |		 \
-		RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |    \
-		RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM |	\
 		RTE_ETH_TX_OFFLOAD_SECURITY)
 
 #define IAVF_TX_VECTOR_OFFLOAD (				 \
@@ -38,14 +36,28 @@
 		RTE_ETH_TX_OFFLOAD_UDP_CKSUM |		 \
 		RTE_ETH_TX_OFFLOAD_TCP_CKSUM)
 
+#define IAVF_TX_VECTOR_OFFLOAD_CTX (			\
+		RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |	\
+		RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM)
+
 #define IAVF_RX_VECTOR_OFFLOAD (				 \
 		RTE_ETH_RX_OFFLOAD_CHECKSUM |		 \
 		RTE_ETH_RX_OFFLOAD_SCTP_CKSUM |		 \
 		RTE_ETH_RX_OFFLOAD_VLAN |		 \
 		RTE_ETH_RX_OFFLOAD_RSS_HASH)
 
+/**
+ * According to the vlan capabilities returned by the driver and FW, the vlan tci
+ * needs to be inserted to the L2TAG1 or L2TAG2 fields.
+ * If L2TAG1, it should be inserted to the L2TAG1 field in data desc.
+ * If L2TAG2, it should be inserted to the L2TAG2 field in ctx desc.
+ * Besides, tunneling parameters and other fields need be configured in ctx desc
+ * if the outer checksum offload is enabled.
+ */
+
 #define IAVF_VECTOR_PATH 0
 #define IAVF_VECTOR_OFFLOAD_PATH 1
+#define IAVF_VECTOR_CTX_OFFLOAD_PATH 2
 
 #define DEFAULT_TX_RS_THRESH     32
 #define DEFAULT_TX_FREE_THRESH   32
@@ -281,6 +293,7 @@ struct iavf_tx_queue {
 #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2	BIT(1)
 	uint8_t vlan_flag;
 	uint8_t tc;
+	uint8_t use_ctx:1;            /* if use the ctx desc, a packet needs two descriptors */
 };
 
 /* Offload features */
@@ -713,6 +726,8 @@ uint16_t iavf_xmit_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t iavf_xmit_pkts_vec_avx512_offload(void *tx_queue,
 					   struct rte_mbuf **tx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_xmit_pkts_vec_avx512_ctx_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts);
 int iavf_txq_vec_setup_avx512(struct iavf_tx_queue *txq);
 
 uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index b416a716cf..ada2d85ff0 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -1782,13 +1782,13 @@ iavf_tx_free_bufs_avx512(struct iavf_tx_queue *txq)
 	    rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
 
-	n = txq->rs_thresh;
+	n = txq->rs_thresh >> txq->use_ctx;
 
 	 /* first buffer to free from S/W ring is at index
 	  * tx_next_dd - (tx_rs_thresh-1)
 	  */
 	txep = (void *)txq->sw_ring;
-	txep += txq->next_dd - (n - 1);
+	txep += (txq->next_dd >> txq->use_ctx) - (n - 1);
 
 	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) {
 		struct rte_mempool *mp = txep[0].mbuf->pool;
@@ -1887,14 +1887,15 @@ tx_backlog_entry_avx512(struct iavf_tx_vec_entry *txep,
 
 static __rte_always_inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
-	  struct rte_mbuf *pkt, uint64_t flags, bool offload)
+	  struct rte_mbuf *pkt, uint64_t flags,
+	  bool offload, uint8_t vlan_flag)
 {
 	uint64_t high_qw =
 		(IAVF_TX_DESC_DTYPE_DATA |
 		 ((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT) |
 		 ((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
 	if (offload)
-		iavf_txd_enable_offload(pkt, &high_qw);
+		iavf_txd_enable_offload(pkt, &high_qw, vlan_flag);
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1905,15 +1906,15 @@ iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 #define IAVF_TX_OFF_MASK 0x55
 static __rte_always_inline void
 iavf_vtx(volatile struct iavf_tx_desc *txdp,
-	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
-	 bool offload)
+		struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
+		bool offload, uint8_t vlan_flag)
 {
 	const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
 			((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
-		iavf_vtx1(txdp, *pkt, flags, offload);
+		iavf_vtx1(txdp, *pkt, flags, offload, vlan_flag);
 		nb_pkts--, txdp++, pkt++;
 	}
 
@@ -1923,26 +1924,24 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[3], &hi_qw3);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[2], &hi_qw2);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[1], &hi_qw1);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
 			 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
-		if (offload)
-			iavf_txd_enable_offload(pkt[0], &hi_qw0);
+		if (offload) {
+			iavf_txd_enable_offload(pkt[3], &hi_qw3, vlan_flag);
+			iavf_txd_enable_offload(pkt[2], &hi_qw2, vlan_flag);
+			iavf_txd_enable_offload(pkt[1], &hi_qw1, vlan_flag);
+			iavf_txd_enable_offload(pkt[0], &hi_qw0, vlan_flag);
+		}
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1959,11 +1958,262 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
 
 	/* do any last ones */
 	while (nb_pkts) {
-		iavf_vtx1(txdp, *pkt, flags, offload);
+		iavf_vtx1(txdp, *pkt, flags, offload, vlan_flag);
 		txdp++, pkt++, nb_pkts--;
 	}
 }
 
+static __rte_always_inline void
+iavf_fill_ctx_desc_tunneling_avx512(uint64_t *low_ctx_qw, struct rte_mbuf *pkt)
+{
+	if (pkt->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		uint64_t eip_typ = IAVF_TX_CTX_DESC_EIPT_NONE;
+		uint64_t eip_len = 0;
+		uint64_t eip_noinc = 0;
+		/* Default - IP_ID is increment in each segment of LSO */
+
+		switch (pkt->ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 |
+				RTE_MBUF_F_TX_OUTER_IPV6 |
+				RTE_MBUF_F_TX_OUTER_IP_CKSUM)) {
+		case RTE_MBUF_F_TX_OUTER_IPV4:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		case RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		case RTE_MBUF_F_TX_OUTER_IPV6:
+			eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV6;
+			eip_len = pkt->outer_l3_len >> 2;
+		break;
+		}
+
+		/* L4TUNT: L4 Tunneling Type */
+		switch (pkt->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		case RTE_MBUF_F_TX_TUNNEL_IPIP:
+			/* for non UDP / GRE tunneling, set to 00b */
+			break;
+		case RTE_MBUF_F_TX_TUNNEL_VXLAN:
+		case RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE:
+		case RTE_MBUF_F_TX_TUNNEL_GTP:
+		case RTE_MBUF_F_TX_TUNNEL_GENEVE:
+			eip_typ |= IAVF_TXD_CTX_UDP_TUNNELING;
+			break;
+		case RTE_MBUF_F_TX_TUNNEL_GRE:
+			eip_typ |= IAVF_TXD_CTX_GRE_TUNNELING;
+			break;
+		default:
+			PMD_TX_LOG(ERR, "Tunnel type not supported");
+			return;
+		}
+
+		/* L4TUNLEN: L4 Tunneling Length, in Words
+		 *
+		 * We depend on app to set rte_mbuf.l2_len correctly.
+		 * For IP in GRE it should be set to the length of the GRE
+		 * header;
+		 * For MAC in GRE or MAC in UDP it should be set to the length
+		 * of the GRE or UDP headers plus the inner MAC up to including
+		 * its last Ethertype.
+		 * If MPLS labels exists, it should include them as well.
+		 */
+		eip_typ |= (pkt->l2_len >> 1) << IAVF_TXD_CTX_QW0_NATLEN_SHIFT;
+
+		/**
+		 * Calculate the tunneling UDP checksum.
+		 * Shall be set only if L4TUNT = 01b and EIPT is not zero
+		 */
+		if ((eip_typ & (IAVF_TX_CTX_EXT_IP_IPV4 |
+					IAVF_TX_CTX_EXT_IP_IPV6 |
+					IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM)) &&
+				(eip_typ & IAVF_TXD_CTX_UDP_TUNNELING) &&
+				(pkt->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM))
+			eip_typ |= IAVF_TXD_CTX_QW0_L4T_CS_MASK;
+
+		*low_ctx_qw = eip_typ << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT |
+			eip_len << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT |
+			eip_noinc << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT;
+
+	} else {
+		*low_ctx_qw = 0;
+	}
+}
+
+static inline void
+iavf_fill_ctx_desc_tunnelling_field(volatile uint64_t *qw0,
+		const struct rte_mbuf *m)
+{
+	uint64_t eip_typ = IAVF_TX_CTX_DESC_EIPT_NONE;
+	uint64_t eip_len = 0;
+	uint64_t eip_noinc = 0;
+	/* Default - IP_ID is increment in each segment of LSO */
+
+	switch (m->ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 |
+			RTE_MBUF_F_TX_OUTER_IPV6 |
+			RTE_MBUF_F_TX_OUTER_IP_CKSUM)) {
+	case RTE_MBUF_F_TX_OUTER_IPV4:
+		eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD;
+		eip_len = m->outer_l3_len >> 2;
+	break;
+	case RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM:
+		eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD;
+		eip_len = m->outer_l3_len >> 2;
+	break;
+	case RTE_MBUF_F_TX_OUTER_IPV6:
+		eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV6;
+		eip_len = m->outer_l3_len >> 2;
+	break;
+	}
+
+	/* L4TUNT: L4 Tunneling Type */
+	switch (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+	case RTE_MBUF_F_TX_TUNNEL_IPIP:
+		/* for non UDP / GRE tunneling, set to 00b */
+		break;
+	case RTE_MBUF_F_TX_TUNNEL_VXLAN:
+	case RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE:
+	case RTE_MBUF_F_TX_TUNNEL_GTP:
+	case RTE_MBUF_F_TX_TUNNEL_GENEVE:
+		eip_typ |= IAVF_TXD_CTX_UDP_TUNNELING;
+		break;
+	case RTE_MBUF_F_TX_TUNNEL_GRE:
+		eip_typ |= IAVF_TXD_CTX_GRE_TUNNELING;
+		break;
+	default:
+		PMD_TX_LOG(ERR, "Tunnel type not supported");
+		return;
+	}
+
+	/* L4TUNLEN: L4 Tunneling Length, in Words
+	 *
+	 * We depend on app to set rte_mbuf.l2_len correctly.
+	 * For IP in GRE it should be set to the length of the GRE
+	 * header;
+	 * For MAC in GRE or MAC in UDP it should be set to the length
+	 * of the GRE or UDP headers plus the inner MAC up to including
+	 * its last Ethertype.
+	 * If MPLS labels exists, it should include them as well.
+	 */
+	eip_typ |= (m->l2_len >> 1) << IAVF_TXD_CTX_QW0_NATLEN_SHIFT;
+
+	/**
+	 * Calculate the tunneling UDP checksum.
+	 * Shall be set only if L4TUNT = 01b and EIPT is not zero
+	 */
+	if ((eip_typ & (IAVF_TX_CTX_EXT_IP_IPV6 |
+				IAVF_TX_CTX_EXT_IP_IPV4 |
+				IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM)) &&
+			(eip_typ & IAVF_TXD_CTX_UDP_TUNNELING) &&
+			(m->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM))
+		eip_typ |= IAVF_TXD_CTX_QW0_L4T_CS_MASK;
+
+	*qw0 = eip_typ << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT |
+		eip_len << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT |
+		eip_noinc << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT;
+}
+
+static __rte_always_inline void
+ctx_vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt,
+		uint64_t flags, bool offload, uint8_t vlan_flag)
+{
+	uint64_t high_ctx_qw = IAVF_TX_DESC_DTYPE_CONTEXT;
+	uint64_t low_ctx_qw = 0;
+
+	if (((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) || offload)) {
+		if (offload)
+			iavf_fill_ctx_desc_tunneling_avx512(&low_ctx_qw, pkt);
+		if ((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) ||
+				(vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)) {
+			high_ctx_qw |= IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+			low_ctx_qw |= (uint64_t)pkt->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+		}
+	}
+	uint64_t high_data_qw = (IAVF_TX_DESC_DTYPE_DATA |
+				((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT) |
+				((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
+	if (offload)
+		iavf_txd_enable_offload(pkt, &high_data_qw, vlan_flag);
+
+	__m256i ctx_data_desc = _mm256_set_epi64x(high_data_qw, pkt->buf_iova + pkt->data_off,
+							high_ctx_qw, low_ctx_qw);
+
+	_mm256_storeu_si256((__m256i *)txdp, ctx_data_desc);
+}
+
+static __rte_always_inline void
+ctx_vtx(volatile struct iavf_tx_desc *txdp,
+		struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags,
+		bool offload, uint8_t vlan_flag)
+{
+	uint64_t hi_data_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
+					((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT));
+
+	/* if unaligned on 32-bit boundary, do one to align */
+	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
+		ctx_vtx1(txdp, *pkt, flags, offload, vlan_flag);
+		nb_pkts--, txdp++, pkt++;
+	}
+
+	for (; nb_pkts > 1; txdp += 4, pkt += 2, nb_pkts -= 2) {
+		uint64_t hi_ctx_qw1 = IAVF_TX_DESC_DTYPE_CONTEXT;
+		uint64_t hi_ctx_qw0 = IAVF_TX_DESC_DTYPE_CONTEXT;
+		uint64_t low_ctx_qw1 = 0;
+		uint64_t low_ctx_qw0 = 0;
+		uint64_t hi_data_qw1 = 0;
+		uint64_t hi_data_qw0 = 0;
+
+		hi_data_qw1 = hi_data_qw_tmpl |
+				((uint64_t)pkt[1]->data_len <<
+					IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+		hi_data_qw0 = hi_data_qw_tmpl |
+				((uint64_t)pkt[0]->data_len <<
+					IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+
+		if (pkt[1]->ol_flags & RTE_MBUF_F_TX_VLAN) {
+			if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+				hi_ctx_qw1 |=
+					IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+				low_ctx_qw1 |=
+					(uint64_t)pkt[1]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+			} else {
+				hi_data_qw1 |=
+					(uint64_t)pkt[1]->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT;
+			}
+		}
+
+		if (pkt[0]->ol_flags & RTE_MBUF_F_TX_VLAN) {
+			if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+				hi_ctx_qw0 |=
+					IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+				low_ctx_qw0 |=
+					(uint64_t)pkt[0]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+			} else {
+				hi_data_qw0 |=
+					(uint64_t)pkt[0]->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT;
+			}
+		}
+
+		if (offload) {
+			iavf_txd_enable_offload(pkt[1], &hi_data_qw1, vlan_flag);
+			iavf_txd_enable_offload(pkt[0], &hi_data_qw0, vlan_flag);
+			iavf_fill_ctx_desc_tunnelling_field(&low_ctx_qw1, pkt[1]);
+			iavf_fill_ctx_desc_tunnelling_field(&low_ctx_qw0, pkt[0]);
+		}
+
+		__m512i desc0_3 =
+				_mm512_set_epi64
+						(hi_data_qw1, pkt[1]->buf_iova + pkt[1]->data_off,
+						hi_ctx_qw1, low_ctx_qw1,
+						hi_data_qw0, pkt[0]->buf_iova + pkt[0]->data_off,
+						hi_ctx_qw0, low_ctx_qw0);
+		_mm512_storeu_si512((void *)txdp, desc0_3);
+	}
+
+	if (nb_pkts)
+		ctx_vtx1(txdp, *pkt, flags, offload, vlan_flag);
+}
+
 static __rte_always_inline uint16_t
 iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 				 uint16_t nb_pkts, bool offload)
@@ -1994,11 +2244,11 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	if (nb_commit >= n) {
 		tx_backlog_entry_avx512(txep, tx_pkts, n);
 
-		iavf_vtx(txdp, tx_pkts, n - 1, flags, offload);
+		iavf_vtx(txdp, tx_pkts, n - 1, flags, offload, txq->vlan_flag);
 		tx_pkts += (n - 1);
 		txdp += (n - 1);
 
-		iavf_vtx1(txdp, *tx_pkts++, rs, offload);
+		iavf_vtx1(txdp, *tx_pkts++, rs, offload, txq->vlan_flag);
 
 		nb_commit = (uint16_t)(nb_commit - n);
 
@@ -2013,7 +2263,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	tx_backlog_entry_avx512(txep, tx_pkts, nb_commit);
 
-	iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload);
+	iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload, txq->vlan_flag);
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
@@ -2031,6 +2281,73 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_pkts;
 }
 
+static __rte_always_inline uint16_t
+iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+				 uint16_t nb_pkts, bool offload)
+{
+	struct iavf_tx_queue *txq = (struct iavf_tx_queue *)tx_queue;
+	volatile struct iavf_tx_desc *txdp;
+	struct iavf_tx_vec_entry *txep;
+	uint16_t n, nb_commit, nb_mbuf, tx_id;
+	/* bit2 is reserved and must be set to 1 according to Spec */
+	uint64_t flags = IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_ICRC;
+	uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
+
+	if (txq->nb_free < txq->free_thresh)
+		iavf_tx_free_bufs_avx512(txq);
+
+	nb_commit = (uint16_t)RTE_MIN(txq->nb_free, nb_pkts << 1);
+	nb_commit &= 0xFFFE;
+	if (unlikely(nb_commit == 0))
+		return 0;
+
+	nb_pkts = nb_commit >> 1;
+	tx_id = txq->tx_tail;
+	txdp = &txq->tx_ring[tx_id];
+	txep = (void *)txq->sw_ring;
+	txep += (tx_id >> 1);
+
+	txq->nb_free = (uint16_t)(txq->nb_free - nb_commit);
+	n = (uint16_t)(txq->nb_tx_desc - tx_id);
+
+	if (n != 0 && nb_commit >= n) {
+		nb_mbuf = n >> 1;
+		tx_backlog_entry_avx512(txep, tx_pkts, nb_mbuf);
+
+		ctx_vtx(txdp, tx_pkts, nb_mbuf - 1, flags, offload, txq->vlan_flag);
+		tx_pkts += (nb_mbuf - 1);
+		txdp += (n - 2);
+		ctx_vtx1(txdp, *tx_pkts++, rs, offload, txq->vlan_flag);
+
+		nb_commit = (uint16_t)(nb_commit - n);
+
+		txq->next_rs = (uint16_t)(txq->rs_thresh - 1);
+		tx_id = 0;
+		/* avoid reach the end of ring */
+		txdp = txq->tx_ring;
+		txep = (void *)txq->sw_ring;
+	}
+
+	nb_mbuf = nb_commit >> 1;
+	tx_backlog_entry_avx512(txep, tx_pkts, nb_mbuf);
+
+	ctx_vtx(txdp, tx_pkts, nb_mbuf, flags, offload, txq->vlan_flag);
+	tx_id = (uint16_t)(tx_id + nb_commit);
+
+	if (tx_id > txq->next_rs) {
+		txq->tx_ring[txq->next_rs].cmd_type_offset_bsz |=
+			rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
+					 IAVF_TXD_QW1_CMD_SHIFT);
+		txq->next_rs =
+			(uint16_t)(txq->next_rs + txq->rs_thresh);
+	}
+
+	txq->tx_tail = tx_id;
+
+	IAVF_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail);
+	return nb_pkts;
+}
+
 static __rte_always_inline uint16_t
 iavf_xmit_pkts_vec_avx512_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,
 			      uint16_t nb_pkts, bool offload)
@@ -2071,9 +2388,11 @@ iavf_tx_queue_release_mbufs_avx512(struct iavf_tx_queue *txq)
 	if (!txq->sw_ring || txq->nb_free == max_desc)
 		return;
 
-	i = txq->next_dd - txq->rs_thresh + 1;
+	i = (txq->next_dd >> txq->use_ctx) + 1 -
+			(txq->rs_thresh >> txq->use_ctx);
+
 	if (txq->tx_tail < i) {
-		for (; i < txq->nb_tx_desc; i++) {
+		for (; i < (unsigned int)(txq->nb_tx_desc >> txq->use_ctx); i++) {
 			rte_pktmbuf_free_seg(swr[i].mbuf);
 			swr[i].mbuf = NULL;
 		}
@@ -2094,3 +2413,34 @@ iavf_xmit_pkts_vec_avx512_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
 {
 	return iavf_xmit_pkts_vec_avx512_cmn(tx_queue, tx_pkts, nb_pkts, true);
 }
+
+static __rte_always_inline uint16_t
+iavf_xmit_pkts_vec_avx512_ctx_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts, bool offload)
+{
+	uint16_t nb_tx = 0;
+	struct iavf_tx_queue *txq = (struct iavf_tx_queue *)tx_queue;
+
+	while (nb_pkts) {
+		uint16_t ret, num;
+
+		/* cross rs_thresh boundary is not allowed */
+		num = (uint16_t)RTE_MIN(nb_pkts << 1, txq->rs_thresh);
+		num = num >> 1;
+		ret = iavf_xmit_fixed_burst_vec_avx512_ctx(tx_queue, &tx_pkts[nb_tx],
+						       num, offload);
+		nb_tx += ret;
+		nb_pkts -= ret;
+		if (ret < num)
+			break;
+	}
+
+	return nb_tx;
+}
+
+uint16_t
+iavf_xmit_pkts_vec_avx512_ctx_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts)
+{
+	return iavf_xmit_pkts_vec_avx512_ctx_cmn(tx_queue, tx_pkts, nb_pkts, true);
+}
diff --git a/drivers/net/iavf/iavf_rxtx_vec_common.h b/drivers/net/iavf/iavf_rxtx_vec_common.h
index a59cb2ceee..9cc1a69dce 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/iavf/iavf_rxtx_vec_common.h
@@ -253,10 +253,25 @@ iavf_tx_vec_queue_default(struct iavf_tx_queue *txq)
 	if (txq->offloads & IAVF_TX_NO_VECTOR_FLAGS)
 		return -1;
 
-	if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD)
-		return IAVF_VECTOR_OFFLOAD_PATH;
-
-	return IAVF_VECTOR_PATH;
+	/**
+	 * Vlan tci needs to be inserted via ctx desc, if the vlan_flag is L2TAG2.
+	 * Tunneling parameters and other fields need be configured in ctx desc
+	 * if the outer checksum offload is enabled.
+	 */
+	if (txq->offloads & (IAVF_TX_VECTOR_OFFLOAD | IAVF_TX_VECTOR_OFFLOAD_CTX)) {
+		if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD_CTX) {
+			if (txq->vlan_flag == IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+				txq->use_ctx = 1;
+				return IAVF_VECTOR_CTX_OFFLOAD_PATH;
+			} else {
+				return -1;
+			}
+		} else {
+			return IAVF_VECTOR_OFFLOAD_PATH;
+		}
+	} else {
+		return IAVF_VECTOR_PATH;
+	}
 }
 
 static inline int
@@ -313,7 +328,7 @@ iavf_tx_vec_dev_check_default(struct rte_eth_dev *dev)
 
 static __rte_always_inline void
 iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
-			uint64_t *txd_hi)
+			uint64_t *txd_hi, uint8_t vlan_flag)
 {
 #if defined(IAVF_TX_CSUM_OFFLOAD) || defined(IAVF_TX_VLAN_QINQ_OFFLOAD)
 	uint64_t ol_flags = tx_pkt->ol_flags;
@@ -325,14 +340,20 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 
 #ifdef IAVF_TX_CSUM_OFFLOAD
 	/* Set MACLEN */
-	td_offset |= (tx_pkt->l2_len >> 1) <<
-		     IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+	if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
+		td_offset |= (tx_pkt->outer_l2_len >> 1)
+			<< IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+	else
+		td_offset |= (tx_pkt->l2_len >> 1)
+			<< IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
 
 	/* Enable L3 checksum offloads */
 	if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
-		td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
-		td_offset |= (tx_pkt->l3_len >> 2) <<
-			     IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+		if (ol_flags & RTE_MBUF_F_TX_IPV4) {
+			td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
+			td_offset |= (tx_pkt->l3_len >> 2) <<
+				     IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+		}
 	} else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
 		td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
 		td_offset |= (tx_pkt->l3_len >> 2) <<
@@ -368,7 +389,8 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 #endif
 
 #ifdef IAVF_TX_VLAN_QINQ_OFFLOAD
-	if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+	if ((ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) &&
+		(vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1)) {
 		td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
 		*txd_hi |= ((uint64_t)tx_pkt->vlan_tci <<
 			    IAVF_TXD_QW1_L2TAG1_SHIFT);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3] net/iavf: enable Tx outer checksum offload on avx512
  2023-02-17  1:49   ` [PATCH v3] net/iavf: enable Tx " Zhichao Zeng
@ 2023-02-17  1:50     ` Xu, Ke1
  2023-02-19  8:15       ` Zhang, Qi Z
  0 siblings, 1 reply; 8+ messages in thread
From: Xu, Ke1 @ 2023-02-17  1:50 UTC (permalink / raw)
  To: Zeng, ZhichaoX, dev
  Cc: Yang, Qiming, Zhou, YidingX, Zhang, Qi Z, Wu, Jingjing, Xing,
	Beilei, Richardson, Bruce, Konstantin Ananyev



> -----Original Message-----
> From: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> Sent: Friday, February 17, 2023 9:49 AM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> <yidingx.zhou@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Xu, Ke1
> <ke1.xu@intel.com>; Zeng, ZhichaoX <zhichaox.zeng@intel.com>; Wu,
> Jingjing <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>
> Subject: [PATCH v3] net/iavf: enable Tx outer checksum offload on avx512
> 
> This patch is to enable outer checksum offload on avx512 Tx path for tunnel
> packet by adding Tx path with context descriptor and adjusting path select
> logic.
> 
> Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>

V3 patch validated and passed.

Tested-by: Ke Xu <ke1.xu@intel.com>

> 
> ---
> v3: fix tunneling parameters
> ---
> v2: fix path select logic
> ---
>  drivers/net/iavf/iavf_rxtx.c            |  14 +-
>  drivers/net/iavf/iavf_rxtx.h            |  19 +-
>  drivers/net/iavf/iavf_rxtx_vec_avx512.c | 392 ++++++++++++++++++++++--
> drivers/net/iavf/iavf_rxtx_vec_common.h |  44 ++-
>  4 files changed, 432 insertions(+), 37 deletions(-)
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3] net/iavf: enable Tx outer checksum offload on avx512
  2023-02-17  1:50     ` Xu, Ke1
@ 2023-02-19  8:15       ` Zhang, Qi Z
  2023-02-22  6:25         ` Zhang, Qi Z
  0 siblings, 1 reply; 8+ messages in thread
From: Zhang, Qi Z @ 2023-02-19  8:15 UTC (permalink / raw)
  To: Xu, Ke1, Zeng, ZhichaoX, dev
  Cc: Yang, Qiming, Zhou, YidingX, Wu, Jingjing, Xing, Beilei,
	Richardson, Bruce, Konstantin Ananyev



> -----Original Message-----
> From: Xu, Ke1 <ke1.xu@intel.com>
> Sent: Friday, February 17, 2023 9:51 AM
> To: Zeng, ZhichaoX <zhichaox.zeng@intel.com>; dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> <yidingx.zhou@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Jingjing
> <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Richardson,
> Bruce <bruce.richardson@intel.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>
> Subject: RE: [PATCH v3] net/iavf: enable Tx outer checksum offload on
> avx512
> 
> 
> 
> > -----Original Message-----
> > From: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> > Sent: Friday, February 17, 2023 9:49 AM
> > To: dev@dpdk.org
> > Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> > <yidingx.zhou@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Xu, Ke1
> > <ke1.xu@intel.com>; Zeng, ZhichaoX <zhichaox.zeng@intel.com>; Wu,
> > Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> > <beilei.xing@intel.com>; Richardson, Bruce
> > <bruce.richardson@intel.com>; Konstantin Ananyev
> > <konstantin.v.ananyev@yandex.ru>
> > Subject: [PATCH v3] net/iavf: enable Tx outer checksum offload on
> > avx512
> >
> > This patch is to enable outer checksum offload on avx512 Tx path for
> > tunnel packet by adding Tx path with context descriptor and adjusting
> > path select logic.
> >
> > Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> > Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
> 
> V3 patch validated and passed.
> 
> Tested-by: Ke Xu <ke1.xu@intel.com>

replaced V2 in dpdk-next-net-intel.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3] net/iavf: enable Tx outer checksum offload on avx512
  2023-02-19  8:15       ` Zhang, Qi Z
@ 2023-02-22  6:25         ` Zhang, Qi Z
  0 siblings, 0 replies; 8+ messages in thread
From: Zhang, Qi Z @ 2023-02-22  6:25 UTC (permalink / raw)
  To: Zhang, Qi Z, Xu, Ke1, Zeng,  ZhichaoX, dev
  Cc: Yang, Qiming, Zhou, YidingX, Wu, Jingjing, Xing, Beilei,
	Richardson, Bruce, Konstantin Ananyev



> -----Original Message-----
> From: Zhang, Qi Z <qi.z.zhang@intel.com>
> Sent: Sunday, February 19, 2023 4:16 PM
> To: Xu, Ke1 <ke1.xu@intel.com>; Zeng, ZhichaoX <zhichaox.zeng@intel.com>;
> dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> <yidingx.zhou@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>; Richardson, Bruce <bruce.richardson@intel.com>;
> Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> Subject: RE: [PATCH v3] net/iavf: enable Tx outer checksum offload on
> avx512
> 
> 
> 
> > -----Original Message-----
> > From: Xu, Ke1 <ke1.xu@intel.com>
> > Sent: Friday, February 17, 2023 9:51 AM
> > To: Zeng, ZhichaoX <zhichaox.zeng@intel.com>; dev@dpdk.org
> > Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> > <yidingx.zhou@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Wu,
> > Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> > <beilei.xing@intel.com>; Richardson, Bruce
> > <bruce.richardson@intel.com>; Konstantin Ananyev
> > <konstantin.v.ananyev@yandex.ru>
> > Subject: RE: [PATCH v3] net/iavf: enable Tx outer checksum offload on
> > avx512
> >
> >
> >
> > > -----Original Message-----
> > > From: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> > > Sent: Friday, February 17, 2023 9:49 AM
> > > To: dev@dpdk.org
> > > Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> > > <yidingx.zhou@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Xu,
> > > Ke1 <ke1.xu@intel.com>; Zeng, ZhichaoX <zhichaox.zeng@intel.com>;
> > > Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> > > <beilei.xing@intel.com>; Richardson, Bruce
> > > <bruce.richardson@intel.com>; Konstantin Ananyev
> > > <konstantin.v.ananyev@yandex.ru>
> > > Subject: [PATCH v3] net/iavf: enable Tx outer checksum offload on
> > > avx512
> > >
> > > This patch is to enable outer checksum offload on avx512 Tx path for
> > > tunnel packet by adding Tx path with context descriptor and
> > > adjusting path select logic.
> > >
> > > Signed-off-by: Yiding Zhou <yidingx.zhou@intel.com>
> > > Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
> >
> > V3 patch validated and passed.
> >
> > Tested-by: Ke Xu <ke1.xu@intel.com>
> 
> replaced V2 in dpdk-next-net-intel.
> 
As v2 already be merged into main, expect a new patch with increment.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-02-22  6:25 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-22  7:32 [PATCH] net/iavf: enable tx outer checksum offload on avx512 Zhichao Zeng
2023-02-02  5:58 ` [PATCH v2] " Zhichao Zeng
2023-02-02  6:24   ` Xu, Ke1
2023-02-02  7:09     ` Zhang, Qi Z
2023-02-17  1:49   ` [PATCH v3] net/iavf: enable Tx " Zhichao Zeng
2023-02-17  1:50     ` Xu, Ke1
2023-02-19  8:15       ` Zhang, Qi Z
2023-02-22  6:25         ` Zhang, Qi Z

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).