From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org, Bruce Richardson <bruce.richardson@intel.com>
Subject: [PATCH v4 23/25] net/intel: support wider x86 vectors for Rx rearm
Date: Fri, 30 May 2025 14:57:19 +0100 [thread overview]
Message-ID: <b7864d189e468484a6af5aa774ed566b2a37818f.1748612804.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1748612803.git.anatoly.burakov@intel.com> <cover.1748612803.git.anatoly.burakov@intel.com>
Currently, for 32-byte descriptor format, only SSE instruction set is
supported. Add implementation for AVX2 and AVX512 instruction sets. Since
we are using Rx descriptor definitions from common code, we can just use
the generic descriptor definition, as we only ever write the first 16 bytes
of it, and the layout is always the same for that part.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
Notes:
v3 -> v4:
- Use the common descriptor format instead of constant propagation
- Syntax and whitespace cleanups
drivers/net/intel/common/rx_vec_x86.h | 339 ++++++++++++++------------
1 file changed, 183 insertions(+), 156 deletions(-)
diff --git a/drivers/net/intel/common/rx_vec_x86.h b/drivers/net/intel/common/rx_vec_x86.h
index 7c57016df7..43f7c59449 100644
--- a/drivers/net/intel/common/rx_vec_x86.h
+++ b/drivers/net/intel/common/rx_vec_x86.h
@@ -43,206 +43,244 @@ _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq)
return 0;
}
-/*
- * SSE code path can handle both 16-byte and 32-byte descriptors with one code
- * path, as we only ever write 16 bytes at a time.
- */
-static __rte_always_inline void
-_ci_rxq_rearm_sse(struct ci_rx_queue *rxq)
+static __rte_always_inline __m128i
+_ci_rxq_rearm_desc_sse(const __m128i vaddr)
{
const __m128i hdroom = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
const __m128i zero = _mm_setzero_si128();
+
+ /* add headroom to address values */
+ __m128i reg = _mm_add_epi64(vaddr, hdroom);
+
+#if RTE_IOVA_IN_MBUF
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ /* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+ reg = _mm_unpackhi_epi64(reg, zero);
+#else
+ /* erase Header Buffer Address */
+ reg = _mm_unpacklo_epi64(reg, zero);
+#endif
+ return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq)
+{
const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+ /* SSE writes 16-bytes regardless of descriptor size */
+ const uint8_t desc_per_reg = 1;
+ const uint8_t desc_per_iter = desc_per_reg * 2;
volatile union ci_rx_desc *rxdp;
int i;
rxdp = &rxq->rx_ring[rxq->rxrearm_start];
/* Initialize the mbufs in vector, process 2 mbufs in one loop */
- for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp += 2) {
+ for (i = 0; i < rearm_thresh;
+ i += desc_per_iter,
+ rxp += desc_per_iter,
+ rxdp += desc_per_iter) {
struct rte_mbuf *mb0 = rxp[0].mbuf;
struct rte_mbuf *mb1 = rxp[1].mbuf;
-#if RTE_IOVA_IN_MBUF
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
- __m128i addr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- __m128i addr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+ const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
- /* add headroom to address values */
- addr0 = _mm_add_epi64(addr0, hdroom);
- addr1 = _mm_add_epi64(addr1, hdroom);
+ const __m128i reg0 = _ci_rxq_rearm_desc_sse(vaddr0);
+ const __m128i reg1 = _ci_rxq_rearm_desc_sse(vaddr1);
-#if RTE_IOVA_IN_MBUF
- /* move IOVA to Packet Buffer Address, erase Header Buffer Address */
- addr0 = _mm_unpackhi_epi64(addr0, zero);
- addr0 = _mm_unpackhi_epi64(addr1, zero);
-#else
- /* erase Header Buffer Address */
- addr0 = _mm_unpacklo_epi64(addr0, zero);
- addr1 = _mm_unpacklo_epi64(addr1, zero);
-#endif
-
- /* flush desc with pa dma_addr */
- _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[0]), addr0);
- _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[1]), addr1);
+ /* flush descriptors */
+ _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[0]), reg0);
+ _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[1]), reg1);
}
}
-#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
#ifdef __AVX2__
-/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
-static __rte_always_inline void
-_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+static __rte_always_inline __m256i
+_ci_rxq_rearm_desc_avx2(const __m128i vaddr0, const __m128i vaddr1)
{
- struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
- const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
- const __m256i hdroom = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+ const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
const __m256i zero = _mm256_setzero_si256();
+
+ /* merge by casting 0 to 256-bit and inserting 1 into the high lanes */
+ __m256i reg = _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+
+ /* add headroom to address values */
+ reg = _mm256_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ /* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+ reg = _mm256_unpackhi_epi64(reg, zero);
+#else
+ /* erase Header Buffer Address */
+ reg = _mm256_unpacklo_epi64(reg, zero);
+#endif
+ return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+{
+ struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+ const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+ /* how many descriptors can fit into a register */
+ const uint8_t desc_per_reg = sizeof(__m256i) / sizeof(union ci_rx_desc);
+ /* how many descriptors can fit into one loop iteration */
+ const uint8_t desc_per_iter = desc_per_reg * 2;
volatile union ci_rx_desc *rxdp;
int i;
- RTE_BUILD_BUG_ON(sizeof(union ci_rx_desc) != 16);
-
rxdp = &rxq->rx_ring[rxq->rxrearm_start];
- /* Initialize the mbufs in vector, process 4 mbufs in one loop */
- for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp += 4) {
- struct rte_mbuf *mb0 = rxp[0].mbuf;
- struct rte_mbuf *mb1 = rxp[1].mbuf;
- struct rte_mbuf *mb2 = rxp[2].mbuf;
- struct rte_mbuf *mb3 = rxp[3].mbuf;
+ /* Initialize the mbufs in vector, process 2 or 4 mbufs in one loop */
+ for (i = 0; i < rearm_thresh;
+ i += desc_per_iter,
+ rxp += desc_per_iter,
+ rxdp += desc_per_iter) {
+ __m256i reg0, reg1;
-#if RTE_IOVA_IN_MBUF
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
- const __m128i vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- const __m128i vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- const __m128i vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- const __m128i vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+ if (desc_per_iter == 2) {
+ /* 16 byte descriptor, 16 byte zero, times two */
+ const __m128i zero = _mm_setzero_si128();
+ const struct rte_mbuf *mb0 = rxp[0].mbuf;
+ const struct rte_mbuf *mb1 = rxp[1].mbuf;
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3
- */
- const __m256i vaddr0_256 = _mm256_castsi128_si256(vaddr0);
- const __m256i vaddr2_256 = _mm256_castsi128_si256(vaddr2);
+ const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+ const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
- __m256i addr0_1 = _mm256_inserti128_si256(vaddr0_256, vaddr1, 1);
- __m256i addr2_3 = _mm256_inserti128_si256(vaddr2_256, vaddr3, 1);
+ reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, zero);
+ reg1 = _ci_rxq_rearm_desc_avx2(vaddr1, zero);
+ } else {
+ /* 16 byte descriptor times four */
+ const struct rte_mbuf *mb0 = rxp[0].mbuf;
+ const struct rte_mbuf *mb1 = rxp[1].mbuf;
+ const struct rte_mbuf *mb2 = rxp[2].mbuf;
+ const struct rte_mbuf *mb3 = rxp[3].mbuf;
- /* add headroom to address values */
- addr0_1 = _mm256_add_epi64(addr0_1, hdroom);
- addr0_1 = _mm256_add_epi64(addr0_1, hdroom);
+ const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+ const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+ const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+ const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
-#if RTE_IOVA_IN_MBUF
- /* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
- addr0_1 = _mm256_unpackhi_epi64(addr0_1, zero);
- addr2_3 = _mm256_unpackhi_epi64(addr2_3, zero);
-#else
- /* erase Header Buffer Address */
- addr0_1 = _mm256_unpacklo_epi64(addr0_1, zero);
- addr2_3 = _mm256_unpacklo_epi64(addr2_3, zero);
-#endif
+ reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, vaddr1);
+ reg1 = _ci_rxq_rearm_desc_avx2(vaddr2, vaddr3);
+ }
- /* flush desc with pa dma_addr */
- _mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[0]), addr0_1);
- _mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[2]), addr2_3);
+ /* flush descriptors */
+ _mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[0]), reg0);
+ _mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[2]), reg1);
}
}
#endif /* __AVX2__ */
#ifdef __AVX512VL__
-/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
-static __rte_always_inline void
-_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+static __rte_always_inline __m512i
+_ci_rxq_rearm_desc_avx512(const __m128i vaddr0, const __m128i vaddr1,
+ const __m128i vaddr2, const __m128i vaddr3)
{
- struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
- const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
- const __m512i hdroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
const __m512i zero = _mm512_setzero_si512();
+ const __m512i hdroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1 into the high
+ * lanes. Similarly for 2 & 3.
+ */
+ const __m256i vaddr0_1 = _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+ const __m256i vaddr2_3 = _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), vaddr3, 1);
+ /*
+ * merge 0+1 & 2+3, by casting 0+1 to 512-bit and inserting 2+3 into the
+ * high lanes.
+ */
+ __m512i reg = _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1), vaddr2_3, 1);
+
+ /* add headroom to address values */
+ reg = _mm512_add_epi64(reg, hdroom);
+
+#if RTE_IOVA_IN_MBUF
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ /* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+ reg = _mm512_unpackhi_epi64(reg, zero);
+#else
+ /* erase Header Buffer Address */
+ reg = _mm512_unpacklo_epi64(reg, zero);
+#endif
+ return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+{
+ struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+ const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+ /* how many descriptors can fit into a register */
+ const uint8_t desc_per_reg = sizeof(__m512i) / sizeof(union ci_rx_desc);
+ /* how many descriptors can fit into one loop iteration */
+ const uint8_t desc_per_iter = desc_per_reg * 2;
volatile union ci_rx_desc *rxdp;
int i;
- RTE_BUILD_BUG_ON(sizeof(union ci_rx_desc) != 16);
-
rxdp = &rxq->rx_ring[rxq->rxrearm_start];
- /* Initialize the mbufs in vector, process 8 mbufs in one loop */
- for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp += 8) {
- struct rte_mbuf *mb0 = rxp[0].mbuf;
- struct rte_mbuf *mb1 = rxp[1].mbuf;
- struct rte_mbuf *mb2 = rxp[2].mbuf;
- struct rte_mbuf *mb3 = rxp[3].mbuf;
- struct rte_mbuf *mb4 = rxp[4].mbuf;
- struct rte_mbuf *mb5 = rxp[5].mbuf;
- struct rte_mbuf *mb6 = rxp[6].mbuf;
- struct rte_mbuf *mb7 = rxp[7].mbuf;
+ /* Initialize the mbufs in vector, process 4 or 8 mbufs in one loop */
+ for (i = 0; i < rearm_thresh;
+ i += desc_per_iter,
+ rxp += desc_per_iter,
+ rxdp += desc_per_iter) {
+ __m512i reg0, reg1;
-#if RTE_IOVA_IN_MBUF
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
- const __m128i vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- const __m128i vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- const __m128i vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- const __m128i vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
- const __m128i vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
- const __m128i vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
- const __m128i vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
- const __m128i vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+ if (desc_per_iter == 4) {
+ /* 16-byte descriptor, 16 byte zero, times four */
+ const __m128i zero = _mm_setzero_si128();
+ const struct rte_mbuf *mb0 = rxp[0].mbuf;
+ const struct rte_mbuf *mb1 = rxp[1].mbuf;
+ const struct rte_mbuf *mb2 = rxp[2].mbuf;
+ const struct rte_mbuf *mb3 = rxp[3].mbuf;
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3, and so on.
- */
- const __m256i addr0_256 = _mm256_castsi128_si256(vaddr0);
- const __m256i addr2_256 = _mm256_castsi128_si256(vaddr2);
- const __m256i addr4_256 = _mm256_castsi128_si256(vaddr4);
- const __m256i addr6_256 = _mm256_castsi128_si256(vaddr6);
+ const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+ const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+ const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+ const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
- const __m256i addr0_1 = _mm256_inserti128_si256(addr0_256, vaddr1, 1);
- const __m256i addr2_3 = _mm256_inserti128_si256(addr2_256, vaddr3, 1);
- const __m256i addr4_5 = _mm256_inserti128_si256(addr4_256, vaddr5, 1);
- const __m256i addr6_7 = _mm256_inserti128_si256(addr6_256, vaddr7, 1);
+ reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, zero, vaddr1, zero);
+ reg1 = _ci_rxq_rearm_desc_avx512(vaddr2, zero, vaddr3, zero);
+ } else {
+ /* 16-byte descriptor times eight */
+ const struct rte_mbuf *mb0 = rxp[0].mbuf;
+ const struct rte_mbuf *mb1 = rxp[1].mbuf;
+ const struct rte_mbuf *mb2 = rxp[2].mbuf;
+ const struct rte_mbuf *mb3 = rxp[3].mbuf;
+ const struct rte_mbuf *mb4 = rxp[4].mbuf;
+ const struct rte_mbuf *mb5 = rxp[5].mbuf;
+ const struct rte_mbuf *mb6 = rxp[6].mbuf;
+ const struct rte_mbuf *mb7 = rxp[7].mbuf;
- /**
- * merge 0_1 & 2_3, by casting 0_1 to 512-bit and inserting 2_3
- * into the high lanes. Similarly for 4_5 & 6_7, and so on.
- */
- const __m512i addr0_1_512 = _mm512_castsi256_si512(addr0_1);
- const __m512i addr4_5_512 = _mm512_castsi256_si512(addr4_5);
+ const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+ const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+ const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+ const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
+ const __m128i vaddr4 = _mm_loadu_si128((const __m128i *)&mb4->buf_addr);
+ const __m128i vaddr5 = _mm_loadu_si128((const __m128i *)&mb5->buf_addr);
+ const __m128i vaddr6 = _mm_loadu_si128((const __m128i *)&mb6->buf_addr);
+ const __m128i vaddr7 = _mm_loadu_si128((const __m128i *)&mb7->buf_addr);
- __m512i addr0_3 = _mm512_inserti64x4(addr0_1_512, addr2_3, 1);
- __m512i addr4_7 = _mm512_inserti64x4(addr4_5_512, addr6_7, 1);
-
- /* add headroom to address values */
- addr0_3 = _mm512_add_epi64(addr0_3, hdroom);
- addr4_7 = _mm512_add_epi64(addr4_7, hdroom);
-
-#if RTE_IOVA_IN_MBUF
- /* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
- addr0_3 = _mm512_unpackhi_epi64(addr0_3, zero);
- addr4_7 = _mm512_unpackhi_epi64(addr4_7, zero);
-#else
- /* erase Header Buffer Address */
- addr0_3 = _mm512_unpacklo_epi64(addr0_3, zero);
- addr4_7 = _mm512_unpacklo_epi64(addr4_7, zero);
-#endif
+ reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, vaddr1, vaddr2, vaddr3);
+ reg1 = _ci_rxq_rearm_desc_avx512(vaddr4, vaddr5, vaddr6, vaddr7);
+ }
/* flush desc with pa dma_addr */
- _mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[0]), addr0_3);
- _mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[4]), addr4_7);
+ _mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[0]), reg0);
+ _mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[4]), reg1);
}
}
#endif /* __AVX512VL__ */
-#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
static __rte_always_inline void
ci_rxq_rearm(struct ci_rx_queue *rxq, const enum ci_rx_vec_level vec_level)
@@ -254,7 +292,6 @@ ci_rxq_rearm(struct ci_rx_queue *rxq, const enum ci_rx_vec_level vec_level)
if (_ci_rxq_rearm_get_bufs(rxq) < 0)
return;
-#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
switch (vec_level) {
case CI_RX_VEC_LEVEL_AVX512:
#ifdef __AVX512VL__
@@ -272,20 +309,10 @@ ci_rxq_rearm(struct ci_rx_queue *rxq, const enum ci_rx_vec_level vec_level)
/* fall back to SSE */
/* fall through */
#endif
- case CI_RX_VEC_LEVEL_SSE:
- _ci_rxq_rearm_sse(rxq, desc_len);
- break;
- }
-#else
- /* for 32-byte descriptors only support SSE */
- switch (vec_level) {
- case CI_RX_VEC_LEVEL_AVX512:
- case CI_RX_VEC_LEVEL_AVX2:
case CI_RX_VEC_LEVEL_SSE:
_ci_rxq_rearm_sse(rxq);
break;
}
-#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
rxq->rxrearm_start += rearm_thresh;
if (rxq->rxrearm_start >= rxq->nb_rx_desc)
--
2.47.1
next prev parent reply other threads:[~2025-05-30 14:00 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 04/13] net/i40e: use the " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 05/13] net/ice: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 06/13] net/iavf: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 09/13] net/iavf: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 10/13] net/ixgbe: " Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 13/13] net/intel: add common Tx " Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 04/13] net/i40e: use the " Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 05/13] net/ice: " Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 06/13] net/iavf: " Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 09/13] net/iavf: " Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 10/13] net/ixgbe: " Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 13/13] net/intel: add common Tx " Anatoly Burakov
2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-12 12:54 ` [PATCH v3 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-14 16:39 ` Bruce Richardson
2025-05-12 12:54 ` [PATCH v3 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-14 16:45 ` Bruce Richardson
2025-05-12 12:54 ` [PATCH v3 04/13] net/i40e: use the " Anatoly Burakov
2025-05-14 16:52 ` Bruce Richardson
2025-05-15 11:09 ` Burakov, Anatoly
2025-05-15 12:55 ` Bruce Richardson
2025-05-12 12:54 ` [PATCH v3 05/13] net/ice: " Anatoly Burakov
2025-05-14 16:56 ` Bruce Richardson
2025-05-23 11:16 ` Burakov, Anatoly
2025-05-12 12:54 ` [PATCH v3 06/13] net/iavf: " Anatoly Burakov
2025-05-15 10:59 ` Bruce Richardson
2025-05-15 11:11 ` Burakov, Anatoly
2025-05-15 12:57 ` Bruce Richardson
2025-05-12 12:54 ` [PATCH v3 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-15 10:56 ` Bruce Richardson
2025-05-12 12:54 ` [PATCH v3 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-15 10:58 ` Bruce Richardson
2025-05-12 12:54 ` [PATCH v3 09/13] net/iavf: " Anatoly Burakov
2025-05-12 12:54 ` [PATCH v3 10/13] net/ixgbe: " Anatoly Burakov
2025-05-12 12:54 ` [PATCH v3 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
2025-05-12 12:54 ` [PATCH v3 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-12 12:54 ` [PATCH v3 13/13] net/intel: add common Tx " Anatoly Burakov
2025-05-15 11:07 ` Bruce Richardson
2025-05-12 12:58 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Bruce Richardson
2025-05-14 16:32 ` Bruce Richardson
2025-05-15 11:15 ` Burakov, Anatoly
2025-05-15 12:58 ` Bruce Richardson
2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
2025-05-30 13:56 ` [PATCH v4 01/25] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-30 13:56 ` [PATCH v4 02/25] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-30 13:56 ` [PATCH v4 03/25] net/ixgbe: match variable names to other drivers Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 04/25] net/i40e: match variable name " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 05/25] net/ice: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 06/25] net/i40e: rename 16-byte descriptor define Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 07/25] net/ice: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 08/25] net/iavf: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 09/25] net/ixgbe: simplify vector PMD compilation Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 10/25] net/ixgbe: replace always-true check Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 11/25] net/ixgbe: clean up definitions Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 12/25] net/i40e: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 13/25] net/ice: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 14/25] net/iavf: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 15/25] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 16/25] net/i40e: use the " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 17/25] net/ice: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 18/25] net/iavf: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 19/25] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 20/25] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 21/25] net/iavf: " Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 22/25] net/ixgbe: " Anatoly Burakov
2025-05-30 13:57 ` Anatoly Burakov [this message]
2025-05-30 13:57 ` [PATCH v4 24/25] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-30 13:57 ` [PATCH v4 25/25] net/intel: add common Tx " Anatoly Burakov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b7864d189e468484a6af5aa774ed566b2a37818f.1748612804.git.anatoly.burakov@intel.com \
--to=anatoly.burakov@intel.com \
--cc=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).