patches for DPDK stable branches
 help / color / mirror / Atom feed
* [dpdk-stable] [PATCH v2 1/3] net/iavf: fix segment fault in AVX512
       [not found] <1617074128-50681-1-git-send-email-wenzhuo.lu@intel.com>
@ 2021-03-30  3:15 ` Wenzhuo Lu
  2021-03-30  3:15 ` [dpdk-stable] [PATCH v2 2/3] net/ice: " Wenzhuo Lu
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Wenzhuo Lu @ 2021-03-30  3:15 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: 31737f2b66fb ("net/iavf: enable AVX512 for legacy Rx")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 120 +------------------
 drivers/net/iavf/iavf_rxtx_vec_avx512.c |   5 +-
 drivers/net/iavf/iavf_rxtx_vec_common.h | 201 ++++++++++++++++++++++++++++++++
 3 files changed, 207 insertions(+), 119 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index cdb5139..2c2b139 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -10,126 +10,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxp,
-				 IAVF_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0];
-		mb1 = rxp[1];
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-			i += 4, rxp += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxp[0];
-		mb1 = rxp[1];
-		mb2 = rxp[2];
-		mb3 = rxp[3];
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	return iavf_rxq_rearm_cmn(rxq, false);
 }
 
 #define PKTLEN_SHIFT     10
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index 5cb4c7c..acd5e54 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -13,7 +13,7 @@
 #define IAVF_DESCS_PER_LOOP_AVX 8
 #define PKTLEN_SHIFT 10
 
-static inline void
+static __rte_always_inline void
 iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 {
 	int i;
@@ -25,6 +25,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (!cache)
+		return iavf_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring from mempool
 	 * We inline the mempool function here, so we can vectorize the copy
 	 * from the cache into the shadow ring.
diff --git a/drivers/net/iavf/iavf_rxtx_vec_common.h b/drivers/net/iavf/iavf_rxtx_vec_common.h
index 46a1873..c4cc544 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/iavf/iavf_rxtx_vec_common.h
@@ -11,6 +11,10 @@
 #include "iavf.h"
 #include "iavf_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 reassemble_packets(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		   uint16_t nb_bufs, uint8_t *split_flags)
@@ -276,4 +280,201 @@
 	return 0;
 }
 
+static __rte_always_inline void
+iavf_rxq_rearm_cmn(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union iavf_rx_desc *rxdp;
+	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxp,
+				 IAVF_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i] = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			IAVF_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxp[0];
+		mb1 = rxp[1];
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
+				i += 8, rxp += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxp[0];
+			mb1 = rxp[1];
+			mb2 = rxp[2];
+			mb3 = rxp[3];
+			mb4 = rxp[4];
+			mb5 = rxp[5];
+			mb6 = rxp[6];
+			mb7 = rxp[7];
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
+				i += 4, rxp += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxp[0];
+			mb1 = rxp[1];
+			mb2 = rxp[2];
+			mb3 = rxp[3];
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+}
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [dpdk-stable] [PATCH v2 2/3] net/ice: fix segment fault in AVX512
       [not found] <1617074128-50681-1-git-send-email-wenzhuo.lu@intel.com>
  2021-03-30  3:15 ` [dpdk-stable] [PATCH v2 1/3] net/iavf: fix segment fault in AVX512 Wenzhuo Lu
@ 2021-03-30  3:15 ` Wenzhuo Lu
  2021-03-30  3:15 ` [dpdk-stable] [PATCH v2 3/3] net/i40e: " Wenzhuo Lu
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Wenzhuo Lu @ 2021-03-30  3:15 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: 7f85d5ebcfe1 ("net/ice: add AVX512 vector path")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/ice/ice_rxtx_vec_avx2.c   | 120 +-------------------
 drivers/net/ice/ice_rxtx_vec_avx512.c |   5 +-
 drivers/net/ice/ice_rxtx_vec_common.h | 201 ++++++++++++++++++++++++++++++++++
 3 files changed, 207 insertions(+), 119 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index 1cc5490..8d3cf3e 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -10,126 +10,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 ice_rxq_rearm(struct ice_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH;
-			i += 4, rxep += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-		mb2 = rxep[2].mbuf;
-		mb3 = rxep[3].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	return ice_rxq_rearm_cmn(rxq, false);
 }
 
 static inline __m256i
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index 0e5a676..f821698 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -12,7 +12,7 @@
 
 #define ICE_DESCS_PER_LOOP_AVX 8
 
-static inline void
+static __rte_always_inline void
 ice_rxq_rearm(struct ice_rx_queue *rxq)
 {
 	int i;
@@ -24,6 +24,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (!cache)
+		return ice_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring */
 	if (cache->len < ICE_RXQ_REARM_THRESH) {
 		uint32_t req = ICE_RXQ_REARM_THRESH + (cache->size -
diff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h
index c09ac7f..a35478b 100644
--- a/drivers/net/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/ice/ice_rxtx_vec_common.h
@@ -7,6 +7,10 @@
 
 #include "ice_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 			  uint16_t nb_bufs, uint8_t *split_flags)
@@ -318,4 +322,201 @@
 	return 0;
 }
 
+static __rte_always_inline void
+ice_rxq_rearm_cmn(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union ice_rx_flex_desc *rxdp;
+	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 ICE_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			ICE_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+				i += 8, rxep += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+			mb4 = rxep[4].mbuf;
+			mb5 = rxep[5].mbuf;
+			mb6 = rxep[6].mbuf;
+			mb7 = rxep[7].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+				i += 4, rxep += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [dpdk-stable] [PATCH v2 3/3] net/i40e: fix segment fault in AVX512
       [not found] <1617074128-50681-1-git-send-email-wenzhuo.lu@intel.com>
  2021-03-30  3:15 ` [dpdk-stable] [PATCH v2 1/3] net/iavf: fix segment fault in AVX512 Wenzhuo Lu
  2021-03-30  3:15 ` [dpdk-stable] [PATCH v2 2/3] net/ice: " Wenzhuo Lu
@ 2021-03-30  3:15 ` Wenzhuo Lu
       [not found] ` <1617082176-51391-1-git-send-email-wenzhuo.lu@intel.com>
       [not found] ` <1617937317-130223-1-git-send-email-wenzhuo.lu@intel.com>
  4 siblings, 0 replies; 14+ messages in thread
From: Wenzhuo Lu @ 2021-03-30  3:15 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: e6a6a138919f ("net/i40e: add AVX512 vector path")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/i40e/i40e_rxtx_vec_avx2.c   | 117 +------------------
 drivers/net/i40e/i40e_rxtx_vec_avx512.c |   5 +-
 drivers/net/i40e/i40e_rxtx_vec_common.h | 199 ++++++++++++++++++++++++++++++++
 3 files changed, 205 insertions(+), 116 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 15abd9d..133e2fb 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -18,123 +18,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-			i += 4, rxep += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-		mb2 = rxep[2].mbuf;
-		mb3 = rxep[3].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/*
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 = _mm256_inserti128_si256(
-				_mm256_castsi128_si256(vaddr0), vaddr1, 1);
-		vaddr2_3 = _mm256_inserti128_si256(
-				_mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	return i40e_rxq_rearm_cmn(rxq, false);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 862c916..3382947 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -20,7 +20,7 @@
 
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
-static inline void
+static __rte_always_inline void
 i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 {
 	int i;
@@ -32,6 +32,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (!cache)
+		return i40e_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring from mempool
 	 * We inline the mempool function here, so we can vectorize the copy
 	 * from the cache into the shadow ring.
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 33cebbe..457634f 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -11,6 +11,10 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		   uint16_t nb_bufs, uint8_t *split_flags)
@@ -252,4 +256,199 @@
 	return -1;
 #endif
 }
+
+static __rte_always_inline void
+i40e_rxq_rearm_cmn(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			RTE_I40E_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+				i += 8, rxep += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+			mb4 = rxep[4].mbuf;
+			mb5 = rxep[5].mbuf;
+			mb6 = rxep[6].mbuf;
+			mb7 = rxep[7].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+				i += 4, rxep += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/*
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 = _mm256_inserti128_si256(
+					_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+			vaddr2_3 = _mm256_inserti128_si256(
+					_mm256_castsi128_si256(vaddr2), vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [dpdk-stable] [PATCH v3 1/3] net/iavf: fix segment fault in AVX512
       [not found] ` <1617082176-51391-1-git-send-email-wenzhuo.lu@intel.com>
@ 2021-03-30  5:29   ` Wenzhuo Lu
  2021-04-06 12:20     ` [dpdk-stable] [dpdk-dev] " Coyle, David
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 2/3] net/ice: " Wenzhuo Lu
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 3/3] net/i40e: " Wenzhuo Lu
  2 siblings, 1 reply; 14+ messages in thread
From: Wenzhuo Lu @ 2021-03-30  5:29 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: 31737f2b66fb ("net/iavf: enable AVX512 for legacy Rx")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 120 +------------------
 drivers/net/iavf/iavf_rxtx_vec_avx512.c |   5 +-
 drivers/net/iavf/iavf_rxtx_vec_common.h | 203 ++++++++++++++++++++++++++++++++
 3 files changed, 209 insertions(+), 119 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index cdb5139..2c2b139 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -10,126 +10,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxp,
-				 IAVF_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0];
-		mb1 = rxp[1];
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-			i += 4, rxp += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxp[0];
-		mb1 = rxp[1];
-		mb2 = rxp[2];
-		mb3 = rxp[3];
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	return iavf_rxq_rearm_cmn(rxq, false);
 }
 
 #define PKTLEN_SHIFT     10
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index 5cb4c7c..acd5e54 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -13,7 +13,7 @@
 #define IAVF_DESCS_PER_LOOP_AVX 8
 #define PKTLEN_SHIFT 10
 
-static inline void
+static __rte_always_inline void
 iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 {
 	int i;
@@ -25,6 +25,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (!cache)
+		return iavf_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring from mempool
 	 * We inline the mempool function here, so we can vectorize the copy
 	 * from the cache into the shadow ring.
diff --git a/drivers/net/iavf/iavf_rxtx_vec_common.h b/drivers/net/iavf/iavf_rxtx_vec_common.h
index 46a1873..57b4381 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/iavf/iavf_rxtx_vec_common.h
@@ -11,6 +11,10 @@
 #include "iavf.h"
 #include "iavf_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 reassemble_packets(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		   uint16_t nb_bufs, uint8_t *split_flags)
@@ -276,4 +280,203 @@
 	return 0;
 }
 
+#ifdef RTE_ARCH_X86
+static __rte_always_inline void
+iavf_rxq_rearm_cmn(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union iavf_rx_desc *rxdp;
+	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxp,
+				 IAVF_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i] = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			IAVF_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxp[0];
+		mb1 = rxp[1];
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
+				i += 8, rxp += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxp[0];
+			mb1 = rxp[1];
+			mb2 = rxp[2];
+			mb3 = rxp[3];
+			mb4 = rxp[4];
+			mb5 = rxp[5];
+			mb6 = rxp[6];
+			mb7 = rxp[7];
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
+				i += 4, rxp += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxp[0];
+			mb1 = rxp[1];
+			mb2 = rxp[2];
+			mb3 = rxp[3];
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [dpdk-stable] [PATCH v3 2/3] net/ice: fix segment fault in AVX512
       [not found] ` <1617082176-51391-1-git-send-email-wenzhuo.lu@intel.com>
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 1/3] net/iavf: " Wenzhuo Lu
@ 2021-03-30  5:29   ` Wenzhuo Lu
  2021-04-06 12:21     ` [dpdk-stable] [dpdk-dev] " Coyle, David
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 3/3] net/i40e: " Wenzhuo Lu
  2 siblings, 1 reply; 14+ messages in thread
From: Wenzhuo Lu @ 2021-03-30  5:29 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: 7f85d5ebcfe1 ("net/ice: add AVX512 vector path")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/ice/ice_rxtx_vec_avx2.c   | 120 +-------------------
 drivers/net/ice/ice_rxtx_vec_avx512.c |   5 +-
 drivers/net/ice/ice_rxtx_vec_common.h | 203 ++++++++++++++++++++++++++++++++++
 3 files changed, 209 insertions(+), 119 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index 1cc5490..8d3cf3e 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -10,126 +10,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 ice_rxq_rearm(struct ice_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH;
-			i += 4, rxep += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-		mb2 = rxep[2].mbuf;
-		mb3 = rxep[3].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	return ice_rxq_rearm_cmn(rxq, false);
 }
 
 static inline __m256i
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index 0e5a676..f821698 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -12,7 +12,7 @@
 
 #define ICE_DESCS_PER_LOOP_AVX 8
 
-static inline void
+static __rte_always_inline void
 ice_rxq_rearm(struct ice_rx_queue *rxq)
 {
 	int i;
@@ -24,6 +24,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (!cache)
+		return ice_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring */
 	if (cache->len < ICE_RXQ_REARM_THRESH) {
 		uint32_t req = ICE_RXQ_REARM_THRESH + (cache->size -
diff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h
index c09ac7f..24b6d3e 100644
--- a/drivers/net/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/ice/ice_rxtx_vec_common.h
@@ -7,6 +7,10 @@
 
 #include "ice_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 			  uint16_t nb_bufs, uint8_t *split_flags)
@@ -318,4 +322,203 @@
 	return 0;
 }
 
+#ifdef RTE_ARCH_X86
+static __rte_always_inline void
+ice_rxq_rearm_cmn(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union ice_rx_flex_desc *rxdp;
+	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 ICE_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			ICE_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+				i += 8, rxep += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+			mb4 = rxep[4].mbuf;
+			mb5 = rxep[5].mbuf;
+			mb6 = rxep[6].mbuf;
+			mb7 = rxep[7].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+				i += 4, rxep += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [dpdk-stable] [PATCH v3 3/3] net/i40e: fix segment fault in AVX512
       [not found] ` <1617082176-51391-1-git-send-email-wenzhuo.lu@intel.com>
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 1/3] net/iavf: " Wenzhuo Lu
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 2/3] net/ice: " Wenzhuo Lu
@ 2021-03-30  5:29   ` Wenzhuo Lu
  2021-04-06 12:22     ` [dpdk-stable] [dpdk-dev] " Coyle, David
  2 siblings, 1 reply; 14+ messages in thread
From: Wenzhuo Lu @ 2021-03-30  5:29 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: e6a6a138919f ("net/i40e: add AVX512 vector path")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/i40e/i40e_rxtx_vec_avx2.c   | 117 +------------------
 drivers/net/i40e/i40e_rxtx_vec_avx512.c |   5 +-
 drivers/net/i40e/i40e_rxtx_vec_common.h | 201 ++++++++++++++++++++++++++++++++
 3 files changed, 207 insertions(+), 116 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 15abd9d..133e2fb 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -18,123 +18,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-			i += 4, rxep += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-		mb2 = rxep[2].mbuf;
-		mb3 = rxep[3].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/*
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 = _mm256_inserti128_si256(
-				_mm256_castsi128_si256(vaddr0), vaddr1, 1);
-		vaddr2_3 = _mm256_inserti128_si256(
-				_mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	return i40e_rxq_rearm_cmn(rxq, false);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 862c916..3382947 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -20,7 +20,7 @@
 
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
-static inline void
+static __rte_always_inline void
 i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 {
 	int i;
@@ -32,6 +32,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (!cache)
+		return i40e_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring from mempool
 	 * We inline the mempool function here, so we can vectorize the copy
 	 * from the cache into the shadow ring.
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 33cebbe..d4f7688 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -11,6 +11,10 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		   uint16_t nb_bufs, uint8_t *split_flags)
@@ -252,4 +256,201 @@
 	return -1;
 #endif
 }
+
+#ifdef RTE_ARCH_X86
+static __rte_always_inline void
+i40e_rxq_rearm_cmn(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			RTE_I40E_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+				i += 8, rxep += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+			mb4 = rxep[4].mbuf;
+			mb5 = rxep[5].mbuf;
+			mb6 = rxep[6].mbuf;
+			mb7 = rxep[7].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+				i += 4, rxep += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/*
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 = _mm256_inserti128_si256(
+					_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+			vaddr2_3 = _mm256_inserti128_si256(
+					_mm256_castsi128_si256(vaddr2), vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [dpdk-stable] [dpdk-dev] [PATCH v3 1/3] net/iavf: fix segment fault in AVX512
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 1/3] net/iavf: " Wenzhuo Lu
@ 2021-04-06 12:20     ` Coyle, David
  0 siblings, 0 replies; 14+ messages in thread
From: Coyle, David @ 2021-04-06 12:20 UTC (permalink / raw)
  To: Lu, Wenzhuo, dev; +Cc: Lu, Wenzhuo, stable



> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Wenzhuo Lu
> Sent: Tuesday, March 30, 2021 6:30 AM
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; stable@dpdk.org
> Subject: [dpdk-dev] [PATCH v3 1/3] net/iavf: fix segment fault in AVX512
>
> Fix segment fault when failing to get the memory from the pool.
>
> Fixes: 31737f2b66fb ("net/iavf: enable AVX512 for legacy Rx")
> Cc: stable@dpdk.org
>
> Reported-by: David Coyle <David.Coyle@intel.com>
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>  drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 120 +------------------
>  drivers/net/iavf/iavf_rxtx_vec_avx512.c |   5 +-
>  drivers/net/iavf/iavf_rxtx_vec_common.h | 203
> ++++++++++++++++++++++++++++++++
>  3 files changed, 209 insertions(+), 119 deletions(-)
>

The patch fixes the seg fault, but note I have only tested the default '#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC' path

Tested-by: David Coyle <david.coyle@intel.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [dpdk-stable] [dpdk-dev] [PATCH v3 2/3] net/ice: fix segment fault in AVX512
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 2/3] net/ice: " Wenzhuo Lu
@ 2021-04-06 12:21     ` Coyle, David
  0 siblings, 0 replies; 14+ messages in thread
From: Coyle, David @ 2021-04-06 12:21 UTC (permalink / raw)
  To: Lu, Wenzhuo, dev; +Cc: Lu, Wenzhuo, stable



> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Wenzhuo Lu
> Sent: Tuesday, March 30, 2021 6:30 AM
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; stable@dpdk.org
> Subject: [dpdk-dev] [PATCH v3 2/3] net/ice: fix segment fault in AVX512
>
> Fix segment fault when failing to get the memory from the pool.
>
> Fixes: 7f85d5ebcfe1 ("net/ice: add AVX512 vector path")
> Cc: stable@dpdk.org
>
> Reported-by: David Coyle <David.Coyle@intel.com>
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>  drivers/net/ice/ice_rxtx_vec_avx2.c   | 120 +-------------------
>  drivers/net/ice/ice_rxtx_vec_avx512.c |   5 +-
>  drivers/net/ice/ice_rxtx_vec_common.h | 203
> ++++++++++++++++++++++++++++++++++
>  3 files changed, 209 insertions(+), 119 deletions(-)
>

The patch fixes the seg fault, but note I have only tested the default '#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC' path

Tested-by: David Coyle <david.coyle@intel.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [dpdk-stable] [dpdk-dev] [PATCH v3 3/3] net/i40e: fix segment fault in AVX512
  2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 3/3] net/i40e: " Wenzhuo Lu
@ 2021-04-06 12:22     ` Coyle, David
  0 siblings, 0 replies; 14+ messages in thread
From: Coyle, David @ 2021-04-06 12:22 UTC (permalink / raw)
  To: Lu, Wenzhuo, dev; +Cc: Lu, Wenzhuo, stable



> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Wenzhuo Lu
> Sent: Tuesday, March 30, 2021 6:30 AM
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; stable@dpdk.org
> Subject: [dpdk-dev] [PATCH v3 3/3] net/i40e: fix segment fault in AVX512
>
> Fix segment fault when failing to get the memory from the pool.
>
> Fixes: e6a6a138919f ("net/i40e: add AVX512 vector path")
> Cc: stable@dpdk.org
>
> Reported-by: David Coyle <David.Coyle@intel.com>
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>  drivers/net/i40e/i40e_rxtx_vec_avx2.c   | 117 +------------------
>  drivers/net/i40e/i40e_rxtx_vec_avx512.c |   5 +-
>  drivers/net/i40e/i40e_rxtx_vec_common.h | 201
> ++++++++++++++++++++++++++++++++
>  3 files changed, 207 insertions(+), 116 deletions(-)
>

The patch fixes the seg fault, but note I have only tested the default '#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC ' path

Tested-by: David Coyle <david.coyle@intel.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [dpdk-stable] [PATCH v4 1/3] net/iavf: fix segment fault in AVX512
       [not found] ` <1617937317-130223-1-git-send-email-wenzhuo.lu@intel.com>
@ 2021-04-09  3:01   ` Wenzhuo Lu
  2021-04-13 12:37     ` [dpdk-stable] [dpdk-dev] " Ferruh Yigit
  2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 2/3] net/ice: " Wenzhuo Lu
  2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 3/3] net/i40e: " Wenzhuo Lu
  2 siblings, 1 reply; 14+ messages in thread
From: Wenzhuo Lu @ 2021-04-09  3:01 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: 31737f2b66fb ("net/iavf: enable AVX512 for legacy Rx")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 120 +------------------
 drivers/net/iavf/iavf_rxtx_vec_avx512.c |   5 +-
 drivers/net/iavf/iavf_rxtx_vec_common.h | 203 ++++++++++++++++++++++++++++++++
 3 files changed, 209 insertions(+), 119 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index cdb5139..2c2b139 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -10,126 +10,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxp,
-				 IAVF_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0];
-		mb1 = rxp[1];
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-			i += 4, rxp += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxp[0];
-		mb1 = rxp[1];
-		mb2 = rxp[2];
-		mb3 = rxp[3];
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	return iavf_rxq_rearm_cmn(rxq, false);
 }
 
 #define PKTLEN_SHIFT     10
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index 67184ae..2927a7c 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -13,7 +13,7 @@
 #define IAVF_DESCS_PER_LOOP_AVX 8
 #define PKTLEN_SHIFT 10
 
-static inline void
+static __rte_always_inline void
 iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 {
 	int i;
@@ -25,6 +25,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (unlikely(!cache))
+		return iavf_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring from mempool
 	 * We inline the mempool function here, so we can vectorize the copy
 	 * from the cache into the shadow ring.
diff --git a/drivers/net/iavf/iavf_rxtx_vec_common.h b/drivers/net/iavf/iavf_rxtx_vec_common.h
index 46a1873..57b4381 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/iavf/iavf_rxtx_vec_common.h
@@ -11,6 +11,10 @@
 #include "iavf.h"
 #include "iavf_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 reassemble_packets(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		   uint16_t nb_bufs, uint8_t *split_flags)
@@ -276,4 +280,203 @@
 	return 0;
 }
 
+#ifdef RTE_ARCH_X86
+static __rte_always_inline void
+iavf_rxq_rearm_cmn(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union iavf_rx_desc *rxdp;
+	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxp,
+				 IAVF_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i] = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			IAVF_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxp[0];
+		mb1 = rxp[1];
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
+				i += 8, rxp += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxp[0];
+			mb1 = rxp[1];
+			mb2 = rxp[2];
+			mb3 = rxp[3];
+			mb4 = rxp[4];
+			mb5 = rxp[5];
+			mb6 = rxp[6];
+			mb7 = rxp[7];
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
+				i += 4, rxp += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxp[0];
+			mb1 = rxp[1];
+			mb2 = rxp[2];
+			mb3 = rxp[3];
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [dpdk-stable] [PATCH v4 2/3] net/ice: fix segment fault in AVX512
       [not found] ` <1617937317-130223-1-git-send-email-wenzhuo.lu@intel.com>
  2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 1/3] net/iavf: " Wenzhuo Lu
@ 2021-04-09  3:01   ` Wenzhuo Lu
  2021-04-13 12:39     ` Ferruh Yigit
  2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 3/3] net/i40e: " Wenzhuo Lu
  2 siblings, 1 reply; 14+ messages in thread
From: Wenzhuo Lu @ 2021-04-09  3:01 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: 7f85d5ebcfe1 ("net/ice: add AVX512 vector path")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/ice/ice_rxtx_vec_avx2.c   | 120 +-------------------
 drivers/net/ice/ice_rxtx_vec_avx512.c |   5 +-
 drivers/net/ice/ice_rxtx_vec_common.h | 203 ++++++++++++++++++++++++++++++++++
 3 files changed, 209 insertions(+), 119 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index 25efd30..7092275 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -10,126 +10,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 ice_rxq_rearm(struct ice_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH;
-			i += 4, rxep += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-		mb2 = rxep[2].mbuf;
-		mb3 = rxep[3].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	return ice_rxq_rearm_cmn(rxq, false);
 }
 
 static inline __m256i
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index 835d0aa..68d4a9b 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -12,7 +12,7 @@
 
 #define ICE_DESCS_PER_LOOP_AVX 8
 
-static inline void
+static __rte_always_inline void
 ice_rxq_rearm(struct ice_rx_queue *rxq)
 {
 	int i;
@@ -24,6 +24,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (unlikely(!cache))
+		return ice_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring */
 	if (cache->len < ICE_RXQ_REARM_THRESH) {
 		uint32_t req = ICE_RXQ_REARM_THRESH + (cache->size -
diff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h
index c09ac7f..24b6d3e 100644
--- a/drivers/net/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/ice/ice_rxtx_vec_common.h
@@ -7,6 +7,10 @@
 
 #include "ice_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 			  uint16_t nb_bufs, uint8_t *split_flags)
@@ -318,4 +322,203 @@
 	return 0;
 }
 
+#ifdef RTE_ARCH_X86
+static __rte_always_inline void
+ice_rxq_rearm_cmn(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union ice_rx_flex_desc *rxdp;
+	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 ICE_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			ICE_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+				i += 8, rxep += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+			mb4 = rxep[4].mbuf;
+			mb5 = rxep[5].mbuf;
+			mb6 = rxep[6].mbuf;
+			mb7 = rxep[7].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+				i += 4, rxep += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [dpdk-stable] [PATCH v4 3/3] net/i40e: fix segment fault in AVX512
       [not found] ` <1617937317-130223-1-git-send-email-wenzhuo.lu@intel.com>
  2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 1/3] net/iavf: " Wenzhuo Lu
  2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 2/3] net/ice: " Wenzhuo Lu
@ 2021-04-09  3:01   ` Wenzhuo Lu
  2 siblings, 0 replies; 14+ messages in thread
From: Wenzhuo Lu @ 2021-04-09  3:01 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: e6a6a138919f ("net/i40e: add AVX512 vector path")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/i40e/i40e_rxtx_vec_avx2.c   | 117 +------------------
 drivers/net/i40e/i40e_rxtx_vec_avx512.c |   5 +-
 drivers/net/i40e/i40e_rxtx_vec_common.h | 201 ++++++++++++++++++++++++++++++++
 3 files changed, 207 insertions(+), 116 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 15abd9d..133e2fb 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -18,123 +18,10 @@
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static inline void
+static __rte_always_inline void
 i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128((__m128i *)&rxdp[i].read,
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
-		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-			i += 4, rxep += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-		mb2 = rxep[2].mbuf;
-		mb3 = rxep[3].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/*
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 = _mm256_inserti128_si256(
-				_mm256_castsi128_si256(vaddr0), vaddr1, 1);
-		vaddr2_3 = _mm256_inserti128_si256(
-				_mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	return i40e_rxq_rearm_cmn(rxq, false);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 862c916..e02fdd4 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -20,7 +20,7 @@
 
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
-static inline void
+static __rte_always_inline void
 i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 {
 	int i;
@@ -32,6 +32,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (unlikely(!cache))
+		return i40e_rxq_rearm_cmn(rxq, true);
+
 	/* We need to pull 'n' more MBUFs into the software ring from mempool
 	 * We inline the mempool function here, so we can vectorize the copy
 	 * from the cache into the shadow ring.
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 33cebbe..d4f7688 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -11,6 +11,10 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
 static inline uint16_t
 reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs,
 		   uint16_t nb_bufs, uint8_t *split_flags)
@@ -252,4 +256,201 @@
 	return -1;
 #endif
 }
+
+#ifdef RTE_ARCH_X86
+static __rte_always_inline void
+i40e_rxq_rearm_cmn(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			RTE_I40E_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+#ifdef CC_AVX512_SUPPORT
+	if (avx512) {
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+		__m512i dma_addr0_3, dma_addr4_7;
+		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+				i += 8, rxep += 8, rxdp += 8) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+			__m256i vaddr0_1, vaddr2_3;
+			__m256i vaddr4_5, vaddr6_7;
+			__m512i vaddr0_3, vaddr4_7;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+			mb4 = rxep[4].mbuf;
+			mb5 = rxep[5].mbuf;
+			mb6 = rxep[6].mbuf;
+			mb7 = rxep[7].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+			/**
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3, and so on.
+			 */
+			vaddr0_1 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+							vaddr1, 1);
+			vaddr2_3 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+							vaddr3, 1);
+			vaddr4_5 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+							vaddr5, 1);
+			vaddr6_7 =
+				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+							vaddr7, 1);
+			vaddr0_3 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+							vaddr2_3, 1);
+			vaddr4_7 =
+				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+							vaddr6_7, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+			/* add headroom to pa values */
+			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+		}
+	} else
+#endif
+	{
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+				i += 4, rxep += 4, rxdp += 4) {
+			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+			__m256i vaddr0_1, vaddr2_3;
+
+			mb0 = rxep[0].mbuf;
+			mb1 = rxep[1].mbuf;
+			mb2 = rxep[2].mbuf;
+			mb3 = rxep[3].mbuf;
+
+			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+					offsetof(struct rte_mbuf, buf_addr) + 8);
+			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+			/*
+			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+			 * into the high lanes. Similarly for 2 & 3
+			 */
+			vaddr0_1 = _mm256_inserti128_si256(
+					_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+			vaddr2_3 = _mm256_inserti128_si256(
+					_mm256_castsi128_si256(vaddr2), vaddr3, 1);
+
+			/* convert pa to dma_addr hdr/data */
+			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+			/* add headroom to pa values */
+			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+			/* flush desc with pa dma_addr */
+			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+		}
+	}
+
+#endif
+
+	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif
+
 #endif
-- 
1.9.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [dpdk-stable] [dpdk-dev] [PATCH v4 1/3] net/iavf: fix segment fault in AVX512
  2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 1/3] net/iavf: " Wenzhuo Lu
@ 2021-04-13 12:37     ` Ferruh Yigit
  0 siblings, 0 replies; 14+ messages in thread
From: Ferruh Yigit @ 2021-04-13 12:37 UTC (permalink / raw)
  To: Wenzhuo Lu, dev, Bruce Richardson; +Cc: stable

On 4/9/2021 4:01 AM, Wenzhuo Lu wrote:
> Fix segment fault when failing to get the memory from the pool.
> 

Can be good to clarify there is no change in the moved code, it is not possible 
to recognize this from patch without using a compare app.

> Fixes: 31737f2b66fb ("net/iavf: enable AVX512 for legacy Rx")
> Cc: stable@dpdk.org
> 
> Reported-by: David Coyle <David.Coyle@intel.com>
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>   drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 120 +------------------
>   drivers/net/iavf/iavf_rxtx_vec_avx512.c |   5 +-
>   drivers/net/iavf/iavf_rxtx_vec_common.h | 203 ++++++++++++++++++++++++++++++++

The common vector code seems moved to 'iavf_rxtx_vec_common.h' but that header 
is included by 'iavf_rxtx_vec_sse.c' too, and the moved function has AVX2 code 
in it.
Won't this fail to build if the AVX2 is not enabled?

Bruce, is there an easy way to test this, meson detects the AVX2 support even I 
provide c_args march that doesn't have AVX2 support.

<...>

> index 46a1873..57b4381 100644
> --- a/drivers/net/iavf/iavf_rxtx_vec_common.h
> +++ b/drivers/net/iavf/iavf_rxtx_vec_common.h
> @@ -11,6 +11,10 @@
>   #include "iavf.h"
>   #include "iavf_rxtx.h"
>   
> +#ifndef __INTEL_COMPILER
> +#pragma GCC diagnostic ignored "-Wcast-qual"
> +#endif
> +

Is this pragma needed or carried here to be sure?

>   static inline uint16_t
>   reassemble_packets(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>   		   uint16_t nb_bufs, uint8_t *split_flags)
> @@ -276,4 +280,203 @@
>   	return 0;
>   }
>   
> +#ifdef RTE_ARCH_X86
> +static __rte_always_inline void
> +iavf_rxq_rearm_cmn(struct iavf_rx_queue *rxq, __rte_unused bool avx512)

What do you think expand 'cmn' to full 'common', it is clear from this patch 
what it stands for but later if you just look this function it is not that clear 
if it is an abbreviation for something else or common.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [dpdk-stable] [PATCH v4 2/3] net/ice: fix segment fault in AVX512
  2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 2/3] net/ice: " Wenzhuo Lu
@ 2021-04-13 12:39     ` Ferruh Yigit
  0 siblings, 0 replies; 14+ messages in thread
From: Ferruh Yigit @ 2021-04-13 12:39 UTC (permalink / raw)
  To: Wenzhuo Lu, dev; +Cc: stable

On 4/9/2021 4:01 AM, Wenzhuo Lu wrote:
> Fix segment fault when failing to get the memory from the pool.
> 
> Fixes: 7f85d5ebcfe1 ("net/ice: add AVX512 vector path")
> Cc: stable@dpdk.org
> 
> Reported-by: David Coyle <David.Coyle@intel.com>

Can you please add the email in consistent format, email part all lowercase?

Thanks.

> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>

<...>


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-04-13 12:39 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1617074128-50681-1-git-send-email-wenzhuo.lu@intel.com>
2021-03-30  3:15 ` [dpdk-stable] [PATCH v2 1/3] net/iavf: fix segment fault in AVX512 Wenzhuo Lu
2021-03-30  3:15 ` [dpdk-stable] [PATCH v2 2/3] net/ice: " Wenzhuo Lu
2021-03-30  3:15 ` [dpdk-stable] [PATCH v2 3/3] net/i40e: " Wenzhuo Lu
     [not found] ` <1617082176-51391-1-git-send-email-wenzhuo.lu@intel.com>
2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 1/3] net/iavf: " Wenzhuo Lu
2021-04-06 12:20     ` [dpdk-stable] [dpdk-dev] " Coyle, David
2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 2/3] net/ice: " Wenzhuo Lu
2021-04-06 12:21     ` [dpdk-stable] [dpdk-dev] " Coyle, David
2021-03-30  5:29   ` [dpdk-stable] [PATCH v3 3/3] net/i40e: " Wenzhuo Lu
2021-04-06 12:22     ` [dpdk-stable] [dpdk-dev] " Coyle, David
     [not found] ` <1617937317-130223-1-git-send-email-wenzhuo.lu@intel.com>
2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 1/3] net/iavf: " Wenzhuo Lu
2021-04-13 12:37     ` [dpdk-stable] [dpdk-dev] " Ferruh Yigit
2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 2/3] net/ice: " Wenzhuo Lu
2021-04-13 12:39     ` Ferruh Yigit
2021-04-09  3:01   ` [dpdk-stable] [PATCH v4 3/3] net/i40e: " Wenzhuo Lu

patches for DPDK stable branches

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ http://inbox.dpdk.org/stable \
		stable@dpdk.org
	public-inbox-index stable

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.stable


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git