* [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD
@ 2021-09-29 12:13 Leyi Rong
2021-09-29 12:13 ` [dpdk-dev] [PATCH 2/2] net/ice: " Leyi Rong
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Leyi Rong @ 2021-09-29 12:13 UTC (permalink / raw)
To: ferruh.yigit, bruce.richardson, qi.z.zhang
Cc: dev, Leyi Rong, wenzhuo.lu, stable
The common header file for vectorization is included in multiple files,
and so must use macros for the current compilation unit, rather than the
compiler-capability flag set for the whole driver. With the current,
incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
to the compiler-defined "__AVX*__" macros fixes this issue. In addition,
splitting AVX-specific code into the new i40e_rxtx_common_avx.h header
file to avoid such bugs.
Bugzilla ID: 788
Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
Cc: wenzhuo.lu@intel.com
Cc: stable@dpdk.org
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/i40e/i40e_rxtx_common_avx.h | 214 ++++++++++++++++++++++++
drivers/net/i40e/i40e_rxtx_vec_common.h | 200 +---------------------
2 files changed, 218 insertions(+), 196 deletions(-)
create mode 100644 drivers/net/i40e/i40e_rxtx_common_avx.h
diff --git a/drivers/net/i40e/i40e_rxtx_common_avx.h b/drivers/net/i40e/i40e_rxtx_common_avx.h
new file mode 100644
index 0000000000..cfc1e63173
--- /dev/null
+++ b/drivers/net/i40e/i40e_rxtx_common_avx.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation
+ */
+
+#ifndef _I40E_RXTX_COMMON_AVX_H_
+#define _I40E_RXTX_COMMON_AVX_H_
+#include <stdint.h>
+#include <ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#ifdef __AVX2__
+static __rte_always_inline void
+i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+{
+ int i;
+ uint16_t rx_id;
+ volatile union i40e_rx_desc *rxdp;
+ struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+ /* Pull 'n' more MBUFs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mp,
+ (void *)rxep,
+ RTE_I40E_RXQ_REARM_THRESH) < 0) {
+ if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+ rxq->nb_rx_desc) {
+ __m128i dma_addr0;
+ dma_addr0 = _mm_setzero_si128();
+ for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+ rxep[i].mbuf = &rxq->fake_mbuf;
+ _mm_store_si128((__m128i *)&rxdp[i].read,
+ dma_addr0);
+ }
+ }
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+ RTE_I40E_RXQ_REARM_THRESH;
+ return;
+ }
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+ struct rte_mbuf *mb0, *mb1;
+ __m128i dma_addr0, dma_addr1;
+ __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+ RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+ __m128i vaddr0, vaddr1;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+ dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+ /* add headroom to pa values */
+ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+ dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+ }
+#else
+#ifdef __AVX512VL__
+ if (avx512) {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+ __m512i dma_addr0_3, dma_addr4_7;
+ __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 8 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+ i += 8, rxep += 8, rxdp += 8) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m128i vaddr4, vaddr5, vaddr6, vaddr7;
+ __m256i vaddr0_1, vaddr2_3;
+ __m256i vaddr4_5, vaddr6_7;
+ __m512i vaddr0_3, vaddr4_7;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+ mb4 = rxep[4].mbuf;
+ mb5 = rxep[5].mbuf;
+ mb6 = rxep[6].mbuf;
+ mb7 = rxep[7].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+ vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+ vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+ vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+ vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3, and so on.
+ */
+ vaddr0_1 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+ vaddr1, 1);
+ vaddr2_3 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+ vaddr3, 1);
+ vaddr4_5 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+ vaddr5, 1);
+ vaddr6_7 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+ vaddr7, 1);
+ vaddr0_3 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+ vaddr2_3, 1);
+ vaddr4_7 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+ vaddr6_7, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+ dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+ /* add headroom to pa values */
+ dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+ dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+ _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+ }
+ } else
+#endif /* __AVX512VL__*/
+ {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ __m256i dma_addr0_1, dma_addr2_3;
+ __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 4 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+ i += 4, rxep += 4, rxdp += 4) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m256i vaddr0_1, vaddr2_3;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3
+ */
+ vaddr0_1 = _mm256_inserti128_si256
+ (_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+ vaddr2_3 = _mm256_inserti128_si256
+ (_mm256_castsi128_si256(vaddr2), vaddr3, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+ dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+ /* add headroom to pa values */
+ dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+ dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+ _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+ }
+ }
+
+#endif
+
+ rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+ if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+ rxq->rxrearm_start = 0;
+
+ rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+ rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+ (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+ /* Update the tail pointer on the NIC */
+ I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif /* __AVX2__*/
+
+#endif /*_I40E_RXTX_COMMON_AVX_H_*/
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index f52ed98d62..798d6a3e4a 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -15,6 +15,10 @@
#pragma GCC diagnostic ignored "-Wcast-qual"
#endif
+#ifdef __AVX2__
+#include "i40e_rxtx_common_avx.h"
+#endif
+
static inline uint16_t
reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs,
uint16_t nb_bufs, uint8_t *split_flags)
@@ -268,200 +272,4 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
#endif
}
-#ifdef CC_AVX2_SUPPORT
-static __rte_always_inline void
-i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
-{
- int i;
- uint16_t rx_id;
- volatile union i40e_rx_desc *rxdp;
- struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
- rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
- /* Pull 'n' more MBUFs into the software ring */
- if (rte_mempool_get_bulk(rxq->mp,
- (void *)rxep,
- RTE_I40E_RXQ_REARM_THRESH) < 0) {
- if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
- rxq->nb_rx_desc) {
- __m128i dma_addr0;
- dma_addr0 = _mm_setzero_si128();
- for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
- rxep[i].mbuf = &rxq->fake_mbuf;
- _mm_store_si128((__m128i *)&rxdp[i].read,
- dma_addr0);
- }
- }
- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_I40E_RXQ_REARM_THRESH;
- return;
- }
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
- struct rte_mbuf *mb0, *mb1;
- __m128i dma_addr0, dma_addr1;
- __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
- RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 2 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
- __m128i vaddr0, vaddr1;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
- dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
- /* add headroom to pa values */
- dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
- dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
- }
-#else
-#ifdef CC_AVX512_SUPPORT
- if (avx512) {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
- __m512i dma_addr0_3, dma_addr4_7;
- __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 8 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
- i += 8, rxep += 8, rxdp += 8) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m128i vaddr4, vaddr5, vaddr6, vaddr7;
- __m256i vaddr0_1, vaddr2_3;
- __m256i vaddr4_5, vaddr6_7;
- __m512i vaddr0_3, vaddr4_7;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
- mb4 = rxep[4].mbuf;
- mb5 = rxep[5].mbuf;
- mb6 = rxep[6].mbuf;
- mb7 = rxep[7].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
- vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
- vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
- vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
- vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3, and so on.
- */
- vaddr0_1 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
- vaddr1, 1);
- vaddr2_3 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
- vaddr3, 1);
- vaddr4_5 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
- vaddr5, 1);
- vaddr6_7 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
- vaddr7, 1);
- vaddr0_3 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
- vaddr2_3, 1);
- vaddr4_7 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
- vaddr6_7, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
- dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
- /* add headroom to pa values */
- dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
- dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
- _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
- }
- } else
-#endif
- {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- __m256i dma_addr0_1, dma_addr2_3;
- __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 4 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
- i += 4, rxep += 4, rxdp += 4) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m256i vaddr0_1, vaddr2_3;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
- /*
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3
- */
- vaddr0_1 = _mm256_inserti128_si256(
- _mm256_castsi128_si256(vaddr0), vaddr1, 1);
- vaddr2_3 = _mm256_inserti128_si256(
- _mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
- dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
- /* add headroom to pa values */
- dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
- dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
- _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
- }
- }
-
-#endif
-
- rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
- if (rxq->rxrearm_start >= rxq->nb_rx_desc)
- rxq->rxrearm_start = 0;
-
- rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
- rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
- (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
- /* Update the tail pointer on the NIC */
- I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
#endif
--
2.17.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH 2/2] net/ice: fix generic build on FreeBSD
2021-09-29 12:13 [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD Leyi Rong
@ 2021-09-29 12:13 ` Leyi Rong
2021-10-18 21:35 ` [dpdk-dev] [PATCH 1/2] net/i40e: " Ferruh Yigit
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Leyi Rong
2 siblings, 0 replies; 12+ messages in thread
From: Leyi Rong @ 2021-09-29 12:13 UTC (permalink / raw)
To: ferruh.yigit, bruce.richardson, qi.z.zhang
Cc: dev, Leyi Rong, wenzhuo.lu, stable
The common header file for vectorization is included in multiple files,
and so must use macros for the current compilation unit, rather than the
compiler-capability flag set for the whole driver. With the current,
incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
to the compiler-defined "__AVX*__" macros fixes this issue. In addition,
splitting AVX-specific code into the new ice_rxtx_common_avx.h header
file to avoid such bugs.
Bugzilla ID: 788
Fixes: a4e480de268e ("net/ice: optimize Tx by using AVX512")
Fixes: 20daa1c978b7 ("net/ice: fix crash in AVX512")
Cc: wenzhuo.lu@intel.com
Cc: leyi.rong@intel.com
Cc: stable@dpdk.org
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/ice/ice_rxtx_common_avx.h | 213 ++++++++++++++++++++++++++
drivers/net/ice/ice_rxtx_vec_common.h | 205 +------------------------
2 files changed, 218 insertions(+), 200 deletions(-)
create mode 100644 drivers/net/ice/ice_rxtx_common_avx.h
diff --git a/drivers/net/ice/ice_rxtx_common_avx.h b/drivers/net/ice/ice_rxtx_common_avx.h
new file mode 100644
index 0000000000..81e0db5dd3
--- /dev/null
+++ b/drivers/net/ice/ice_rxtx_common_avx.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _ICE_RXTX_COMMON_AVX_H_
+#define _ICE_RXTX_COMMON_AVX_H_
+
+#include "ice_rxtx.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#ifdef __AVX2__
+static __rte_always_inline void
+ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+{
+ int i;
+ uint16_t rx_id;
+ volatile union ice_rx_flex_desc *rxdp;
+ struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+ /* Pull 'n' more MBUFs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mp,
+ (void *)rxep,
+ ICE_RXQ_REARM_THRESH) < 0) {
+ if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+ rxq->nb_rx_desc) {
+ __m128i dma_addr0;
+
+ dma_addr0 = _mm_setzero_si128();
+ for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+ rxep[i].mbuf = &rxq->fake_mbuf;
+ _mm_store_si128((__m128i *)&rxdp[i].read,
+ dma_addr0);
+ }
+ }
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+ ICE_RXQ_REARM_THRESH;
+ return;
+ }
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+ struct rte_mbuf *mb0, *mb1;
+ __m128i dma_addr0, dma_addr1;
+ __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+ RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+ for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+ __m128i vaddr0, vaddr1;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+ dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+ /* add headroom to pa values */
+ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+ dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+ }
+#else
+#ifdef __AVX512VL__
+ if (avx512) {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+ __m512i dma_addr0_3, dma_addr4_7;
+ __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 8 mbufs in one loop */
+ for (i = 0; i < ICE_RXQ_REARM_THRESH;
+ i += 8, rxep += 8, rxdp += 8) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m128i vaddr4, vaddr5, vaddr6, vaddr7;
+ __m256i vaddr0_1, vaddr2_3;
+ __m256i vaddr4_5, vaddr6_7;
+ __m512i vaddr0_3, vaddr4_7;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+ mb4 = rxep[4].mbuf;
+ mb5 = rxep[5].mbuf;
+ mb6 = rxep[6].mbuf;
+ mb7 = rxep[7].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+ vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+ vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+ vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+ vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3, and so on.
+ */
+ vaddr0_1 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+ vaddr1, 1);
+ vaddr2_3 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+ vaddr3, 1);
+ vaddr4_5 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+ vaddr5, 1);
+ vaddr6_7 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+ vaddr7, 1);
+ vaddr0_3 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+ vaddr2_3, 1);
+ vaddr4_7 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+ vaddr6_7, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+ dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+ /* add headroom to pa values */
+ dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+ dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+ _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+ }
+ } else
+#endif /* __AVX512VL__ */
+ {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ __m256i dma_addr0_1, dma_addr2_3;
+ __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 4 mbufs in one loop */
+ for (i = 0; i < ICE_RXQ_REARM_THRESH;
+ i += 4, rxep += 4, rxdp += 4) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m256i vaddr0_1, vaddr2_3;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3
+ */
+ vaddr0_1 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+ vaddr1, 1);
+ vaddr2_3 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+ vaddr3, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+ dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+ /* add headroom to pa values */
+ dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+ dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+ _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+ }
+ }
+
+#endif
+
+ rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
+ if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+ rxq->rxrearm_start = 0;
+
+ rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
+
+ rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+ (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+ /* Update the tail pointer on the NIC */
+ ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif /* __AVX2__ */
+
+#endif /* _ICE_RXTX_COMMON_AVX_H_ */
diff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h
index 5b5250565e..94ba87cbd9 100644
--- a/drivers/net/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/ice/ice_rxtx_vec_common.h
@@ -11,6 +11,10 @@
#pragma GCC diagnostic ignored "-Wcast-qual"
#endif
+#ifdef __AVX2__
+#include "ice_rxtx_common_avx.h"
+#endif
+
static inline uint16_t
ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs,
uint16_t nb_bufs, uint8_t *split_flags)
@@ -194,7 +198,7 @@ _ice_tx_queue_release_mbufs_vec(struct ice_tx_queue *txq)
*/
i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
-#ifdef CC_AVX512_SUPPORT
+#ifdef __AVX512VL__
struct rte_eth_dev *dev = &rte_eth_devices[txq->vsi->adapter->pf.dev_data->port_id];
if (dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512 ||
@@ -355,205 +359,6 @@ ice_tx_vec_dev_check_default(struct rte_eth_dev *dev)
return result;
}
-#ifdef CC_AVX2_SUPPORT
-static __rte_always_inline void
-ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
-{
- int i;
- uint16_t rx_id;
- volatile union ice_rx_flex_desc *rxdp;
- struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
- rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
- /* Pull 'n' more MBUFs into the software ring */
- if (rte_mempool_get_bulk(rxq->mp,
- (void *)rxep,
- ICE_RXQ_REARM_THRESH) < 0) {
- if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
- rxq->nb_rx_desc) {
- __m128i dma_addr0;
-
- dma_addr0 = _mm_setzero_si128();
- for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
- rxep[i].mbuf = &rxq->fake_mbuf;
- _mm_store_si128((__m128i *)&rxdp[i].read,
- dma_addr0);
- }
- }
- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- ICE_RXQ_REARM_THRESH;
- return;
- }
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
- struct rte_mbuf *mb0, *mb1;
- __m128i dma_addr0, dma_addr1;
- __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
- RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 2 mbufs in one loop */
- for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
- __m128i vaddr0, vaddr1;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
- dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
- /* add headroom to pa values */
- dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
- dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
- }
-#else
-#ifdef CC_AVX512_SUPPORT
- if (avx512) {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
- __m512i dma_addr0_3, dma_addr4_7;
- __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 8 mbufs in one loop */
- for (i = 0; i < ICE_RXQ_REARM_THRESH;
- i += 8, rxep += 8, rxdp += 8) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m128i vaddr4, vaddr5, vaddr6, vaddr7;
- __m256i vaddr0_1, vaddr2_3;
- __m256i vaddr4_5, vaddr6_7;
- __m512i vaddr0_3, vaddr4_7;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
- mb4 = rxep[4].mbuf;
- mb5 = rxep[5].mbuf;
- mb6 = rxep[6].mbuf;
- mb7 = rxep[7].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
- vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
- vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
- vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
- vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3, and so on.
- */
- vaddr0_1 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
- vaddr1, 1);
- vaddr2_3 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
- vaddr3, 1);
- vaddr4_5 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
- vaddr5, 1);
- vaddr6_7 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
- vaddr7, 1);
- vaddr0_3 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
- vaddr2_3, 1);
- vaddr4_7 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
- vaddr6_7, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
- dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
- /* add headroom to pa values */
- dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
- dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
- _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
- }
- } else
-#endif
- {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- __m256i dma_addr0_1, dma_addr2_3;
- __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 4 mbufs in one loop */
- for (i = 0; i < ICE_RXQ_REARM_THRESH;
- i += 4, rxep += 4, rxdp += 4) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m256i vaddr0_1, vaddr2_3;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3
- */
- vaddr0_1 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
- vaddr1, 1);
- vaddr2_3 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
- vaddr3, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
- dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
- /* add headroom to pa values */
- dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
- dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
- _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
- }
- }
-
-#endif
-
- rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
- if (rxq->rxrearm_start >= rxq->nb_rx_desc)
- rxq->rxrearm_start = 0;
-
- rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
- rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
- (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
- /* Update the tail pointer on the NIC */
- ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
static inline void
ice_txd_enable_offload(struct rte_mbuf *tx_pkt,
uint64_t *txd_hi)
--
2.17.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD
2021-09-29 12:13 [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD Leyi Rong
2021-09-29 12:13 ` [dpdk-dev] [PATCH 2/2] net/ice: " Leyi Rong
@ 2021-10-18 21:35 ` Ferruh Yigit
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Leyi Rong
2 siblings, 0 replies; 12+ messages in thread
From: Ferruh Yigit @ 2021-10-18 21:35 UTC (permalink / raw)
To: Leyi Rong, bruce.richardson, qi.z.zhang; +Cc: dev, wenzhuo.lu, stable
On 9/29/2021 1:13 PM, Leyi Rong wrote:
> The common header file for vectorization is included in multiple files,
> and so must use macros for the current compilation unit, rather than the
> compiler-capability flag set for the whole driver. With the current,
> incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
> SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
> to the compiler-defined "__AVX*__" macros fixes this issue. In addition,
> splitting AVX-specific code into the new i40e_rxtx_common_avx.h header
> file to avoid such bugs.
>
> Bugzilla ID: 788
> Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
> Cc: wenzhuo.lu@intel.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Leyi Rong <leyi.rong@intel.com>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
<...>
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
> index f52ed98d62..798d6a3e4a 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_common.h
> +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
> @@ -15,6 +15,10 @@
> #pragma GCC diagnostic ignored "-Wcast-qual"
> #endif
>
> +#ifdef __AVX2__
> +#include "i40e_rxtx_common_avx.h"
> +#endif
> +
The 'i40e_rxtx_common_avx.h' header is not required by 'i40e_rxtx_vec_common.h',
why not include 'i40e_rxtx_common_avx.h' directly from related .c files:
i40e_rxtx_vec_avx2.c
i40e_rxtx_vec_avx512.c
So "#ifdef __AVX2__" won't be needed. Same for ice.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 0/2] fix generic build error on FreeBSD
2021-09-29 12:13 [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD Leyi Rong
2021-09-29 12:13 ` [dpdk-dev] [PATCH 2/2] net/ice: " Leyi Rong
2021-10-18 21:35 ` [dpdk-dev] [PATCH 1/2] net/i40e: " Ferruh Yigit
@ 2021-10-19 3:02 ` Leyi Rong
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 1/2] net/i40e: fix generic build " Leyi Rong
` (2 more replies)
2 siblings, 3 replies; 12+ messages in thread
From: Leyi Rong @ 2021-10-19 3:02 UTC (permalink / raw)
To: ferruh.yigit, bruce.richardson, qi.z.zhang
Cc: wenzhuo.lu, dev, stable, Leyi Rong
This patchset fix FreeBSD build error reported by
https://bugs.dpdk.org/show_bug.cgi?id=788.
Also splitting AVX-specific code into new xxx_common_avx.h header file.
---
v2:
- Decouple i40e_rxtx_common_avx.h/ice_rxtx_common_avx.h from
i40e_rxtx_vec_common.h/ice_rxtx_vec_common.h
Leyi Rong (2):
net/i40e: fix generic build on FreeBSD
net/ice: fix generic build on FreeBSD
drivers/net/i40e/i40e_rxtx_common_avx.h | 214 ++++++++++++++++++++++++
drivers/net/i40e/i40e_rxtx_vec_avx2.c | 1 +
drivers/net/i40e/i40e_rxtx_vec_avx512.c | 1 +
drivers/net/i40e/i40e_rxtx_vec_common.h | 196 ----------------------
drivers/net/ice/ice_rxtx_common_avx.h | 213 +++++++++++++++++++++++
drivers/net/ice/ice_rxtx_vec_avx2.c | 1 +
drivers/net/ice/ice_rxtx_vec_avx512.c | 1 +
drivers/net/ice/ice_rxtx_vec_common.h | 201 +---------------------
8 files changed, 432 insertions(+), 396 deletions(-)
create mode 100644 drivers/net/i40e/i40e_rxtx_common_avx.h
create mode 100644 drivers/net/ice/ice_rxtx_common_avx.h
--
2.17.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 1/2] net/i40e: fix generic build on FreeBSD
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Leyi Rong
@ 2021-10-19 3:02 ` Leyi Rong
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 2/2] net/ice: " Leyi Rong
2021-10-19 9:18 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Ferruh Yigit
2 siblings, 0 replies; 12+ messages in thread
From: Leyi Rong @ 2021-10-19 3:02 UTC (permalink / raw)
To: ferruh.yigit, bruce.richardson, qi.z.zhang
Cc: wenzhuo.lu, dev, stable, Leyi Rong
The common header file for vectorization is included in multiple files,
and so must use macros for the current compilation unit, rather than the
compiler-capability flag set for the whole driver. With the current,
incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
to the compiler-defined "__AVX*__" macros fixes this issue. In addition,
splitting AVX-specific code into the new i40e_rxtx_common_avx.h header
file to avoid such bugs.
Bugzilla ID: 788
Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
Cc: wenzhuo.lu@intel.com
Cc: stable@dpdk.org
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/i40e/i40e_rxtx_common_avx.h | 214 ++++++++++++++++++++++++
drivers/net/i40e/i40e_rxtx_vec_avx2.c | 1 +
drivers/net/i40e/i40e_rxtx_vec_avx512.c | 1 +
drivers/net/i40e/i40e_rxtx_vec_common.h | 196 ----------------------
4 files changed, 216 insertions(+), 196 deletions(-)
create mode 100644 drivers/net/i40e/i40e_rxtx_common_avx.h
diff --git a/drivers/net/i40e/i40e_rxtx_common_avx.h b/drivers/net/i40e/i40e_rxtx_common_avx.h
new file mode 100644
index 0000000000..cfc1e63173
--- /dev/null
+++ b/drivers/net/i40e/i40e_rxtx_common_avx.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation
+ */
+
+#ifndef _I40E_RXTX_COMMON_AVX_H_
+#define _I40E_RXTX_COMMON_AVX_H_
+#include <stdint.h>
+#include <ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#ifdef __AVX2__
+static __rte_always_inline void
+i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+{
+ int i;
+ uint16_t rx_id;
+ volatile union i40e_rx_desc *rxdp;
+ struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+ /* Pull 'n' more MBUFs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mp,
+ (void *)rxep,
+ RTE_I40E_RXQ_REARM_THRESH) < 0) {
+ if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+ rxq->nb_rx_desc) {
+ __m128i dma_addr0;
+ dma_addr0 = _mm_setzero_si128();
+ for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+ rxep[i].mbuf = &rxq->fake_mbuf;
+ _mm_store_si128((__m128i *)&rxdp[i].read,
+ dma_addr0);
+ }
+ }
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+ RTE_I40E_RXQ_REARM_THRESH;
+ return;
+ }
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+ struct rte_mbuf *mb0, *mb1;
+ __m128i dma_addr0, dma_addr1;
+ __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+ RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+ __m128i vaddr0, vaddr1;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+ dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+ /* add headroom to pa values */
+ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+ dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+ }
+#else
+#ifdef __AVX512VL__
+ if (avx512) {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+ __m512i dma_addr0_3, dma_addr4_7;
+ __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 8 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+ i += 8, rxep += 8, rxdp += 8) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m128i vaddr4, vaddr5, vaddr6, vaddr7;
+ __m256i vaddr0_1, vaddr2_3;
+ __m256i vaddr4_5, vaddr6_7;
+ __m512i vaddr0_3, vaddr4_7;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+ mb4 = rxep[4].mbuf;
+ mb5 = rxep[5].mbuf;
+ mb6 = rxep[6].mbuf;
+ mb7 = rxep[7].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+ vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+ vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+ vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+ vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3, and so on.
+ */
+ vaddr0_1 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+ vaddr1, 1);
+ vaddr2_3 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+ vaddr3, 1);
+ vaddr4_5 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+ vaddr5, 1);
+ vaddr6_7 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+ vaddr7, 1);
+ vaddr0_3 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+ vaddr2_3, 1);
+ vaddr4_7 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+ vaddr6_7, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+ dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+ /* add headroom to pa values */
+ dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+ dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+ _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+ }
+ } else
+#endif /* __AVX512VL__*/
+ {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ __m256i dma_addr0_1, dma_addr2_3;
+ __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 4 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+ i += 4, rxep += 4, rxdp += 4) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m256i vaddr0_1, vaddr2_3;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3
+ */
+ vaddr0_1 = _mm256_inserti128_si256
+ (_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+ vaddr2_3 = _mm256_inserti128_si256
+ (_mm256_castsi128_si256(vaddr2), vaddr3, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+ dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+ /* add headroom to pa values */
+ dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+ dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+ _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+ }
+ }
+
+#endif
+
+ rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+ if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+ rxq->rxrearm_start = 0;
+
+ rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+ rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+ (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+ /* Update the tail pointer on the NIC */
+ I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif /* __AVX2__*/
+
+#endif /*_I40E_RXTX_COMMON_AVX_H_*/
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 3b9eef91a9..5f7b50d67c 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -11,6 +11,7 @@
#include "i40e_ethdev.h"
#include "i40e_rxtx.h"
#include "i40e_rxtx_vec_common.h"
+#include "i40e_rxtx_common_avx.h"
#include <rte_vect.h>
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index bd21d64223..4ffe030fcb 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -11,6 +11,7 @@
#include "i40e_ethdev.h"
#include "i40e_rxtx.h"
#include "i40e_rxtx_vec_common.h"
+#include "i40e_rxtx_common_avx.h"
#include <rte_vect.h>
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index f52ed98d62..f52e3c5675 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -268,200 +268,4 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
#endif
}
-#ifdef CC_AVX2_SUPPORT
-static __rte_always_inline void
-i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
-{
- int i;
- uint16_t rx_id;
- volatile union i40e_rx_desc *rxdp;
- struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
- rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
- /* Pull 'n' more MBUFs into the software ring */
- if (rte_mempool_get_bulk(rxq->mp,
- (void *)rxep,
- RTE_I40E_RXQ_REARM_THRESH) < 0) {
- if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
- rxq->nb_rx_desc) {
- __m128i dma_addr0;
- dma_addr0 = _mm_setzero_si128();
- for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
- rxep[i].mbuf = &rxq->fake_mbuf;
- _mm_store_si128((__m128i *)&rxdp[i].read,
- dma_addr0);
- }
- }
- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_I40E_RXQ_REARM_THRESH;
- return;
- }
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
- struct rte_mbuf *mb0, *mb1;
- __m128i dma_addr0, dma_addr1;
- __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
- RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 2 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
- __m128i vaddr0, vaddr1;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
- dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
- /* add headroom to pa values */
- dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
- dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
- }
-#else
-#ifdef CC_AVX512_SUPPORT
- if (avx512) {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
- __m512i dma_addr0_3, dma_addr4_7;
- __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 8 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
- i += 8, rxep += 8, rxdp += 8) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m128i vaddr4, vaddr5, vaddr6, vaddr7;
- __m256i vaddr0_1, vaddr2_3;
- __m256i vaddr4_5, vaddr6_7;
- __m512i vaddr0_3, vaddr4_7;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
- mb4 = rxep[4].mbuf;
- mb5 = rxep[5].mbuf;
- mb6 = rxep[6].mbuf;
- mb7 = rxep[7].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
- vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
- vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
- vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
- vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3, and so on.
- */
- vaddr0_1 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
- vaddr1, 1);
- vaddr2_3 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
- vaddr3, 1);
- vaddr4_5 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
- vaddr5, 1);
- vaddr6_7 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
- vaddr7, 1);
- vaddr0_3 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
- vaddr2_3, 1);
- vaddr4_7 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
- vaddr6_7, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
- dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
- /* add headroom to pa values */
- dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
- dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
- _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
- }
- } else
-#endif
- {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- __m256i dma_addr0_1, dma_addr2_3;
- __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 4 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
- i += 4, rxep += 4, rxdp += 4) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m256i vaddr0_1, vaddr2_3;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
- /*
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3
- */
- vaddr0_1 = _mm256_inserti128_si256(
- _mm256_castsi128_si256(vaddr0), vaddr1, 1);
- vaddr2_3 = _mm256_inserti128_si256(
- _mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
- dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
- /* add headroom to pa values */
- dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
- dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
- _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
- }
- }
-
-#endif
-
- rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
- if (rxq->rxrearm_start >= rxq->nb_rx_desc)
- rxq->rxrearm_start = 0;
-
- rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
- rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
- (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
- /* Update the tail pointer on the NIC */
- I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
#endif
--
2.17.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 2/2] net/ice: fix generic build on FreeBSD
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Leyi Rong
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 1/2] net/i40e: fix generic build " Leyi Rong
@ 2021-10-19 3:02 ` Leyi Rong
2021-10-19 9:18 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Ferruh Yigit
2 siblings, 0 replies; 12+ messages in thread
From: Leyi Rong @ 2021-10-19 3:02 UTC (permalink / raw)
To: ferruh.yigit, bruce.richardson, qi.z.zhang
Cc: wenzhuo.lu, dev, stable, Leyi Rong
The common header file for vectorization is included in multiple files,
and so must use macros for the current compilation unit, rather than the
compiler-capability flag set for the whole driver. With the current,
incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
to the compiler-defined "__AVX*__" macros fixes this issue. In addition,
splitting AVX-specific code into the new ice_rxtx_common_avx.h header
file to avoid such bugs.
Bugzilla ID: 788
Fixes: a4e480de268e ("net/ice: optimize Tx by using AVX512")
Fixes: 20daa1c978b7 ("net/ice: fix crash in AVX512")
Cc: wenzhuo.lu@intel.com
Cc: leyi.rong@intel.com
Cc: stable@dpdk.org
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/ice/ice_rxtx_common_avx.h | 213 ++++++++++++++++++++++++++
drivers/net/ice/ice_rxtx_vec_avx2.c | 1 +
drivers/net/ice/ice_rxtx_vec_avx512.c | 1 +
drivers/net/ice/ice_rxtx_vec_common.h | 201 +-----------------------
4 files changed, 216 insertions(+), 200 deletions(-)
create mode 100644 drivers/net/ice/ice_rxtx_common_avx.h
diff --git a/drivers/net/ice/ice_rxtx_common_avx.h b/drivers/net/ice/ice_rxtx_common_avx.h
new file mode 100644
index 0000000000..81e0db5dd3
--- /dev/null
+++ b/drivers/net/ice/ice_rxtx_common_avx.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _ICE_RXTX_COMMON_AVX_H_
+#define _ICE_RXTX_COMMON_AVX_H_
+
+#include "ice_rxtx.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#ifdef __AVX2__
+static __rte_always_inline void
+ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+{
+ int i;
+ uint16_t rx_id;
+ volatile union ice_rx_flex_desc *rxdp;
+ struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+ /* Pull 'n' more MBUFs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mp,
+ (void *)rxep,
+ ICE_RXQ_REARM_THRESH) < 0) {
+ if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+ rxq->nb_rx_desc) {
+ __m128i dma_addr0;
+
+ dma_addr0 = _mm_setzero_si128();
+ for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+ rxep[i].mbuf = &rxq->fake_mbuf;
+ _mm_store_si128((__m128i *)&rxdp[i].read,
+ dma_addr0);
+ }
+ }
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+ ICE_RXQ_REARM_THRESH;
+ return;
+ }
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+ struct rte_mbuf *mb0, *mb1;
+ __m128i dma_addr0, dma_addr1;
+ __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+ RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+ for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+ __m128i vaddr0, vaddr1;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+ dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+ /* add headroom to pa values */
+ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+ dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+ }
+#else
+#ifdef __AVX512VL__
+ if (avx512) {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+ __m512i dma_addr0_3, dma_addr4_7;
+ __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 8 mbufs in one loop */
+ for (i = 0; i < ICE_RXQ_REARM_THRESH;
+ i += 8, rxep += 8, rxdp += 8) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m128i vaddr4, vaddr5, vaddr6, vaddr7;
+ __m256i vaddr0_1, vaddr2_3;
+ __m256i vaddr4_5, vaddr6_7;
+ __m512i vaddr0_3, vaddr4_7;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+ mb4 = rxep[4].mbuf;
+ mb5 = rxep[5].mbuf;
+ mb6 = rxep[6].mbuf;
+ mb7 = rxep[7].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+ vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+ vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+ vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+ vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3, and so on.
+ */
+ vaddr0_1 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+ vaddr1, 1);
+ vaddr2_3 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+ vaddr3, 1);
+ vaddr4_5 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+ vaddr5, 1);
+ vaddr6_7 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+ vaddr7, 1);
+ vaddr0_3 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+ vaddr2_3, 1);
+ vaddr4_7 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+ vaddr6_7, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+ dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+ /* add headroom to pa values */
+ dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+ dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+ _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+ }
+ } else
+#endif /* __AVX512VL__ */
+ {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ __m256i dma_addr0_1, dma_addr2_3;
+ __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 4 mbufs in one loop */
+ for (i = 0; i < ICE_RXQ_REARM_THRESH;
+ i += 4, rxep += 4, rxdp += 4) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m256i vaddr0_1, vaddr2_3;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3
+ */
+ vaddr0_1 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+ vaddr1, 1);
+ vaddr2_3 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+ vaddr3, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+ dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+ /* add headroom to pa values */
+ dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+ dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+ _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+ }
+ }
+
+#endif
+
+ rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
+ if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+ rxq->rxrearm_start = 0;
+
+ rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
+
+ rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+ (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+ /* Update the tail pointer on the NIC */
+ ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif /* __AVX2__ */
+
+#endif /* _ICE_RXTX_COMMON_AVX_H_ */
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index 9725ac0180..490693bff2 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -3,6 +3,7 @@
*/
#include "ice_rxtx_vec_common.h"
+#include "ice_rxtx_common_avx.h"
#include <rte_vect.h>
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index 5bba9887d2..7efe7b50a2 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -3,6 +3,7 @@
*/
#include "ice_rxtx_vec_common.h"
+#include "ice_rxtx_common_avx.h"
#include <rte_vect.h>
diff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h
index 5b5250565e..f0f9926585 100644
--- a/drivers/net/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/ice/ice_rxtx_vec_common.h
@@ -194,7 +194,7 @@ _ice_tx_queue_release_mbufs_vec(struct ice_tx_queue *txq)
*/
i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
-#ifdef CC_AVX512_SUPPORT
+#ifdef __AVX512VL__
struct rte_eth_dev *dev = &rte_eth_devices[txq->vsi->adapter->pf.dev_data->port_id];
if (dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512 ||
@@ -355,205 +355,6 @@ ice_tx_vec_dev_check_default(struct rte_eth_dev *dev)
return result;
}
-#ifdef CC_AVX2_SUPPORT
-static __rte_always_inline void
-ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
-{
- int i;
- uint16_t rx_id;
- volatile union ice_rx_flex_desc *rxdp;
- struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
- rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
- /* Pull 'n' more MBUFs into the software ring */
- if (rte_mempool_get_bulk(rxq->mp,
- (void *)rxep,
- ICE_RXQ_REARM_THRESH) < 0) {
- if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
- rxq->nb_rx_desc) {
- __m128i dma_addr0;
-
- dma_addr0 = _mm_setzero_si128();
- for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
- rxep[i].mbuf = &rxq->fake_mbuf;
- _mm_store_si128((__m128i *)&rxdp[i].read,
- dma_addr0);
- }
- }
- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- ICE_RXQ_REARM_THRESH;
- return;
- }
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
- struct rte_mbuf *mb0, *mb1;
- __m128i dma_addr0, dma_addr1;
- __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
- RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 2 mbufs in one loop */
- for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
- __m128i vaddr0, vaddr1;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
- dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
- /* add headroom to pa values */
- dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
- dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
- }
-#else
-#ifdef CC_AVX512_SUPPORT
- if (avx512) {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
- __m512i dma_addr0_3, dma_addr4_7;
- __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 8 mbufs in one loop */
- for (i = 0; i < ICE_RXQ_REARM_THRESH;
- i += 8, rxep += 8, rxdp += 8) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m128i vaddr4, vaddr5, vaddr6, vaddr7;
- __m256i vaddr0_1, vaddr2_3;
- __m256i vaddr4_5, vaddr6_7;
- __m512i vaddr0_3, vaddr4_7;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
- mb4 = rxep[4].mbuf;
- mb5 = rxep[5].mbuf;
- mb6 = rxep[6].mbuf;
- mb7 = rxep[7].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
- vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
- vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
- vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
- vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3, and so on.
- */
- vaddr0_1 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
- vaddr1, 1);
- vaddr2_3 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
- vaddr3, 1);
- vaddr4_5 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
- vaddr5, 1);
- vaddr6_7 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
- vaddr7, 1);
- vaddr0_3 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
- vaddr2_3, 1);
- vaddr4_7 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
- vaddr6_7, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
- dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
- /* add headroom to pa values */
- dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
- dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
- _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
- }
- } else
-#endif
- {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- __m256i dma_addr0_1, dma_addr2_3;
- __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 4 mbufs in one loop */
- for (i = 0; i < ICE_RXQ_REARM_THRESH;
- i += 4, rxep += 4, rxdp += 4) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m256i vaddr0_1, vaddr2_3;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3
- */
- vaddr0_1 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
- vaddr1, 1);
- vaddr2_3 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
- vaddr3, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
- dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
- /* add headroom to pa values */
- dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
- dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
- _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
- }
- }
-
-#endif
-
- rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
- if (rxq->rxrearm_start >= rxq->nb_rx_desc)
- rxq->rxrearm_start = 0;
-
- rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
- rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
- (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
- /* Update the tail pointer on the NIC */
- ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
static inline void
ice_txd_enable_offload(struct rte_mbuf *tx_pkt,
uint64_t *txd_hi)
--
2.17.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH v2 0/2] fix generic build error on FreeBSD
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Leyi Rong
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 1/2] net/i40e: fix generic build " Leyi Rong
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 2/2] net/ice: " Leyi Rong
@ 2021-10-19 9:18 ` Ferruh Yigit
2021-10-19 11:01 ` Zhang, Qi Z
2 siblings, 1 reply; 12+ messages in thread
From: Ferruh Yigit @ 2021-10-19 9:18 UTC (permalink / raw)
To: Leyi Rong, bruce.richardson, qi.z.zhang; +Cc: wenzhuo.lu, dev, stable
On 10/19/2021 4:02 AM, Leyi Rong wrote:
> This patchset fix FreeBSD build error reported by
> https://bugs.dpdk.org/show_bug.cgi?id=788.
> Also splitting AVX-specific code into new xxx_common_avx.h header file.
>
> ---
> v2:
> - Decouple i40e_rxtx_common_avx.h/ice_rxtx_common_avx.h from
> i40e_rxtx_vec_common.h/ice_rxtx_vec_common.h
>
>
> Leyi Rong (2):
> net/i40e: fix generic build on FreeBSD
> net/ice: fix generic build on FreeBSD
>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH v2 0/2] fix generic build error on FreeBSD
2021-10-19 9:18 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Ferruh Yigit
@ 2021-10-19 11:01 ` Zhang, Qi Z
0 siblings, 0 replies; 12+ messages in thread
From: Zhang, Qi Z @ 2021-10-19 11:01 UTC (permalink / raw)
To: Yigit, Ferruh, Rong, Leyi, Richardson, Bruce; +Cc: Lu, Wenzhuo, dev, stable
> -----Original Message-----
> From: Yigit, Ferruh <ferruh.yigit@intel.com>
> Sent: Tuesday, October 19, 2021 5:18 PM
> To: Rong, Leyi <leyi.rong@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; dev@dpdk.org; stable@dpdk.org
> Subject: Re: [PATCH v2 0/2] fix generic build error on FreeBSD
>
> On 10/19/2021 4:02 AM, Leyi Rong wrote:
> > This patchset fix FreeBSD build error reported by
> > https://bugs.dpdk.org/show_bug.cgi?id=788.
> > Also splitting AVX-specific code into new xxx_common_avx.h header file.
> >
> > ---
> > v2:
> > - Decouple i40e_rxtx_common_avx.h/ice_rxtx_common_avx.h from
> > i40e_rxtx_vec_common.h/ice_rxtx_vec_common.h
> >
> >
> > Leyi Rong (2):
> > net/i40e: fix generic build on FreeBSD
> > net/ice: fix generic build on FreeBSD
> >
>
> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
Applied to dpdk-next-net-intel
Thanks
Qi
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [Bug 788] i40e: 16BYTE_RX_DESC build broken on FreeBSD-13
@ 2021-08-10 18:27 bugzilla
2021-08-18 16:38 ` [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD Bruce Richardson
0 siblings, 1 reply; 12+ messages in thread
From: bugzilla @ 2021-08-10 18:27 UTC (permalink / raw)
To: dev
https://bugs.dpdk.org/show_bug.cgi?id=788
Bug ID: 788
Summary: i40e: 16BYTE_RX_DESC build broken on FreeBSD-13
Product: DPDK
Version: 21.08
Hardware: x86
OS: FreeBSD
Status: UNCONFIRMED
Severity: normal
Priority: Normal
Component: ethdev
Assignee: dev@dpdk.org
Reporter: brian90013@gmail.com
Target Milestone: ---
Hello,
I just tried compiling DPDK 21.08 and found my configuration no longer builds
on FreeBSD-13.0. With version 21.05, I defined RTE_LIBRTE_I40E_16BYTE_RX_DESC
in rte_config.h as described in section "Use 16 Bytes RX Descriptor Size" of
the current i40e PMD documentation. I also defined a similar variable
RTE_LIBRTE_ICE_16BYTE_RX_DESC in rte_config.h for the ice PMD.
This morning I brought in version 21.08 and watched it compile on FreeBSD-12.2
(clang version 10.0.1) running on an 'Intel(R) Xeon(R) CPU E5-2637 v3'. Then I
tried building it on FreeBSD-13.0 (clang version 11.0.1) on a 'AMD Ryzen
Threadripper 3990X 64-Core Processor' but the build died with a number of
compilation errors related to avx512f features enabled in functions compiled
without support for avx512f.
Below I have an edited build log from the FreeBSD-12.2 system that works
followed by the log from the FreeBSD-13.0 system that fails. Looking at the
12.2 log, there is a warning “Binutils error with AVX512 assembly, disabling
AVX512 support” that might be hiding this issue? Neither system has hardware
support for AVX-512 but it appears that the compiler does. Thank you for your
help!
*** FreeBSD-12.2 build that works ***
The Meson build system
Version: 0.58.1
Build type: native build
Program cat found: YES (/bin/cat)
Project name: DPDK
Project version: 21.08.0
C compiler for the host machine: cc (clang 10.0.1 "FreeBSD clang version 10.0.1
(git@github.com:llvm/llvm-project.git llvmorg-10.0.1-0-gef32c611aa2)")
C linker for the host machine: cc ld.lld 10.0.1
Host machine cpu family: x86_64
Host machine cpu: x86_64
Compiler for C supports arguments -mno-avx512f: YES
config/x86/meson.build:9: WARNING: Binutils error with AVX512 assembly,
disabling AVX512 support
Compiler for C supports arguments -mavx512f: YES
Checking if "AVX512 checking" compiles: YES
Fetching value of define "__SSE4_2__" : 1
Fetching value of define "__AES__" : 1
Fetching value of define "__AVX__" : 1
Fetching value of define "__AVX2__" : 1
Fetching value of define "__AVX512BW__" :
Fetching value of define "__AVX512CD__" :
Fetching value of define "__AVX512DQ__" :
Fetching value of define "__AVX512F__" :
Fetching value of define "__AVX512VL__" :
Fetching value of define "__PCLMUL__" : 1
Fetching value of define "__RDRND__" : 1
Fetching value of define "__RDSEED__" :
Fetching value of define "__VPCLMULQDQ__" :
Compiler for C supports arguments -mpclmul: YES
Compiler for C supports arguments -maes: YES
*** FreeBSD-13.0 system that does not build ***
The Meson build system
Version: 0.58.1
Build type: native build
Program cat found: YES (/bin/cat)
Project name: DPDK
Project version: 21.08.0
C compiler for the host machine: cc (clang 11.0.1 "FreeBSD clang version 11.0.1
(git@github.com:llvm/llvm-project.git llvmorg-11.0.1-0-g43ff75f2c3fe)")
C linker for the host machine: cc ld.lld 11.0.1
Host machine cpu family: x86_64
Host machine cpu: x86_64
Compiler for C supports arguments -mavx512f: YES
Checking if "AVX512 checking" compiles: YES
Fetching value of define "__SSE4_2__" : 1
Fetching value of define "__AES__" : 1
Fetching value of define "__AVX__" : 1
Fetching value of define "__AVX2__" : 1
Fetching value of define "__AVX512BW__" :
Fetching value of define "__AVX512CD__" :
Fetching value of define "__AVX512DQ__" :
Fetching value of define "__AVX512F__" :
Fetching value of define "__AVX512VL__" :
Fetching value of define "__PCLMUL__" : 1
Fetching value of define "__RDRND__" : 1
Fetching value of define "__RDSEED__" : 1
Fetching value of define "__VPCLMULQDQ__" :
Compiler for C supports arguments -mpclmul: YES
Compiler for C supports arguments -maes: YES
Compiler for C supports arguments -mavx512f: YES (cached)
Compiler for C supports arguments -mavx512bw: YES
Compiler for C supports arguments -mavx512dq: YES
Compiler for C supports arguments -mavx512vl: YES
Compiler for C supports arguments -mvpclmulqdq: YES
Compiler for C supports arguments -mavx2: YES
Compiler for C supports arguments -mavx: YES
Compiler for C supports arguments -mavx512f -mavx512vl -mavx512cd -mavx512bw:
YES
Compiler for C supports arguments -mavx512f -mavx512dq: YES
FAILED: drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o
cc -Idrivers/libtmp_rte_net_i40e.a.p -Idrivers -I../drivers -Idrivers/net/i40e
-I../drivers/net/i40e -Idrivers/net/i40e/base -I../drivers/net/i40e/base
-Ilib/ethdev -I../lib/ethdev -I. -I.. -Iconfig -I../config -Ilib/eal/include
-I../lib/eal/include -Ilib/eal/freebsd/include -I../lib/eal/freebsd/include
-Ilib/eal/x86/include -I../lib/eal/x86/include -Ilib/eal/common
-I../lib/eal/common -Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs
-Ilib/metrics -I../lib/metrics -Ilib/telemetry -I../lib/telemetry -Ilib/net
-I../lib/net -Ilib/mbuf -I../lib/mbuf -Ilib/mempool -I../lib/mempool -Ilib/ring
-I../lib/ring -Ilib/meter -I../lib/meter -Idrivers/bus/pci -I../drivers/bus/pci
-I../drivers/bus/pci/bsd -Ilib/pci -I../lib/pci -Idrivers/bus/vdev
-I../drivers/bus/vdev -Ilib/hash -I../lib/hash -Ilib/rcu -I../lib/rcu
-fcolor-diagnostics -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -O3 -include
rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat -Wformat-nonliteral
-Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wnested-externs
-Wold-style-definition -Wpointer-arith -Wsign-compare -Wstrict-prototypes
-Wundef -Wwrite-strings -Wno-address-of-packed-member
-Wno-missing-field-initializers -D_GNU_SOURCE -D__BSD_VISIBLE -fPIC
-march=native -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -DPF_DRIVER
-DVF_DRIVER -DINTEGRATED_VF -DX722_A0_SUPPORT -DCC_AVX2_SUPPORT
-DCC_AVX512_SUPPORT -DRTE_LOG_DEFAULT_LOGTYPE=pmd.net.i40e -MD -MQ
drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o -MF
drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o.d -o
drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o -c
../drivers/net/i40e/i40e_rxtx_vec_avx2.c
In file included from ../drivers/net/i40e/i40e_rxtx_vec_avx2.c:13:
../drivers/net/i40e/i40e_rxtx_vec_common.h:337:22: error: always_inline
function '_mm512_set1_epi64' requires target feature 'avx512f', but would be
inlined into function 'i40e_rxq_rearm_common' that is compiled without support
for 'avx512f'
__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:337:22: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:385:5: error:
'__builtin_ia32_inserti64x4' needs target feature avx512f
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
^
/usr/lib/clang/11.0.1/include/avx512fintrin.h:7413:12: note: expanded from
macro '_mm512_inserti64x4'
(__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
^
In file included from ../drivers/net/i40e/i40e_rxtx_vec_avx2.c:13:
../drivers/net/i40e/i40e_rxtx_vec_common.h:385:24: error: always_inline
function '_mm512_castsi256_si512' requires target feature 'avx512f', but would
be inlined into function 'i40e_rxq_rearm_common' that is compiled without
support for 'avx512f'
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:385:24: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:388:5: error:
'__builtin_ia32_inserti64x4' needs target feature avx512f
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
^
/usr/lib/clang/11.0.1/include/avx512fintrin.h:7413:12: note: expanded from
macro '_mm512_inserti64x4'
(__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
^
In file included from ../drivers/net/i40e/i40e_rxtx_vec_avx2.c:13:
../drivers/net/i40e/i40e_rxtx_vec_common.h:388:24: error: always_inline
function '_mm512_castsi256_si512' requires target feature 'avx512f', but would
be inlined into function 'i40e_rxq_rearm_common' that is compiled without
support for 'avx512f'
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:388:24: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:392:18: error: always_inline
function '_mm512_unpackhi_epi64' requires target feature 'avx512f', but would
be inlined into function 'i40e_rxq_rearm_common' that is compiled without
support for 'avx512f'
dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3,
vaddr0_3);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:392:18: error: AVX vector argument
of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:393:18: error: always_inline
function '_mm512_unpackhi_epi64' requires target feature 'avx512f', but would
be inlined into function 'i40e_rxq_rearm_common' that is compiled without
support for 'avx512f'
dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7,
vaddr4_7);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:393:18: error: AVX vector argument
of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:396:18: error: always_inline
function '_mm512_add_epi64' requires target feature 'avx512f', but would be
inlined into function 'i40e_rxq_rearm_common' that is compiled without support
for 'avx512f'
dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:396:18: error: AVX vector argument
of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:397:18: error: always_inline
function '_mm512_add_epi64' requires target feature 'avx512f', but would be
inlined into function 'i40e_rxq_rearm_common' that is compiled without support
for 'avx512f'
dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:397:18: error: AVX vector argument
of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:400:4: error: always_inline function
'_mm512_store_si512' requires target feature 'avx512f', but would be inlined
into function 'i40e_rxq_rearm_common' that is compiled without support for
'avx512f'
_mm512_store_si512((__m512i *)&rxdp->read,
dma_addr0_3);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:400:4: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:401:4: error: always_inline function
'_mm512_store_si512' requires target feature 'avx512f', but would be inlined
into function 'i40e_rxq_rearm_common' that is compiled without support for
'avx512f'
_mm512_store_si512((__m512i *)&(rxdp + 4)->read,
dma_addr4_7);
^
fatal error: too many errors emitted, stopping now [-ferror-limit=]
20 errors generated.
[971/1893] Compiling C object
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o
FAILED: drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o
cc -Idrivers/libtmp_rte_net_ice.a.p -Idrivers -I../drivers -Idrivers/net/ice
-I../drivers/net/ice -Idrivers/net/ice/base -I../drivers/net/ice/base
-Idrivers/common/iavf -I../drivers/common/iavf -Ilib/ethdev -I../lib/ethdev -I.
-I.. -Iconfig -I../config -Ilib/eal/include -I../lib/eal/include
-Ilib/eal/freebsd/include -I../lib/eal/freebsd/include -Ilib/eal/x86/include
-I../lib/eal/x86/include -Ilib/eal/common -I../lib/eal/common -Ilib/eal
-I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/metrics -I../lib/metrics
-Ilib/telemetry -I../lib/telemetry -Ilib/net -I../lib/net -Ilib/mbuf
-I../lib/mbuf -Ilib/mempool -I../lib/mempool -Ilib/ring -I../lib/ring
-Ilib/meter -I../lib/meter -Idrivers/bus/pci -I../drivers/bus/pci
-I../drivers/bus/pci/bsd -Ilib/pci -I../lib/pci -Idrivers/bus/vdev
-I../drivers/bus/vdev -Ilib/hash -I../lib/hash -Ilib/rcu -I../lib/rcu
-fcolor-diagnostics -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -O3 -include
rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat -Wformat-nonliteral
-Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wnested-externs
-Wold-style-definition -Wpointer-arith -Wsign-compare -Wstrict-prototypes
-Wundef -Wwrite-strings -Wno-address-of-packed-member
-Wno-missing-field-initializers -D_GNU_SOURCE -D__BSD_VISIBLE -fPIC
-march=native -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -DCC_AVX2_SUPPORT
-DCC_AVX512_SUPPORT -DRTE_LOG_DEFAULT_LOGTYPE=pmd.net.ice -MD -MQ
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o -MF
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o.d -o
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o -c
../drivers/net/ice/ice_rxtx_vec_avx2.c
In file included from ../drivers/net/ice/ice_rxtx_vec_avx2.c:5:
../drivers/net/ice/ice_rxtx_vec_common.h:422:22: error: always_inline function
'_mm512_set1_epi64' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
^
../drivers/net/ice/ice_rxtx_vec_common.h:422:22: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:470:5: error:
'__builtin_ia32_inserti64x4' needs target feature avx512f
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
^
/usr/lib/clang/11.0.1/include/avx512fintrin.h:7413:12: note: expanded from
macro '_mm512_inserti64x4'
(__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
^
In file included from ../drivers/net/ice/ice_rxtx_vec_avx2.c:5:
../drivers/net/ice/ice_rxtx_vec_common.h:470:24: error: always_inline function
'_mm512_castsi256_si512' requires target feature 'avx512f', but would be
inlined into function 'ice_rxq_rearm_common' that is compiled without support
for 'avx512f'
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
^
../drivers/net/ice/ice_rxtx_vec_common.h:470:24: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:473:5: error:
'__builtin_ia32_inserti64x4' needs target feature avx512f
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
^
/usr/lib/clang/11.0.1/include/avx512fintrin.h:7413:12: note: expanded from
macro '_mm512_inserti64x4'
(__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
^
In file included from ../drivers/net/ice/ice_rxtx_vec_avx2.c:5:
../drivers/net/ice/ice_rxtx_vec_common.h:473:24: error: always_inline function
'_mm512_castsi256_si512' requires target feature 'avx512f', but would be
inlined into function 'ice_rxq_rearm_common' that is compiled without support
for 'avx512f'
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
^
../drivers/net/ice/ice_rxtx_vec_common.h:473:24: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:477:18: error: always_inline function
'_mm512_unpackhi_epi64' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3,
vaddr0_3);
^
../drivers/net/ice/ice_rxtx_vec_common.h:477:18: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:478:18: error: always_inline function
'_mm512_unpackhi_epi64' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7,
vaddr4_7);
^
../drivers/net/ice/ice_rxtx_vec_common.h:478:18: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:481:18: error: always_inline function
'_mm512_add_epi64' requires target feature 'avx512f', but would be inlined into
function 'ice_rxq_rearm_common' that is compiled without support for 'avx512f'
dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
^
../drivers/net/ice/ice_rxtx_vec_common.h:481:18: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:482:18: error: always_inline function
'_mm512_add_epi64' requires target feature 'avx512f', but would be inlined into
function 'ice_rxq_rearm_common' that is compiled without support for 'avx512f'
dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
^
../drivers/net/ice/ice_rxtx_vec_common.h:482:18: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:485:4: error: always_inline function
'_mm512_store_si512' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
_mm512_store_si512((__m512i *)&rxdp->read,
dma_addr0_3);
^
../drivers/net/ice/ice_rxtx_vec_common.h:485:4: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:486:4: error: always_inline function
'_mm512_store_si512' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
_mm512_store_si512((__m512i *)&(rxdp + 4)->read,
dma_addr4_7);
^
fatal error: too many errors emitted, stopping now [-ferror-limit=]
20 errors generated.
[998/1893] Compiling C object
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx.c.o
../drivers/net/ice/ice_rxtx.c:129:60: warning: unused parameter 'rxq'
[-Wunused-parameter]
ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
^
../drivers/net/ice/ice_rxtx.c:171:60: warning: unused parameter 'rxq'
[-Wunused-parameter]
ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq,
^
2 warnings generated.
[1006/1893] Compiling C object
lib/librte_pipeline.a.p/pipeline_rte_table_action.c.o
ninja: build stopped: subcommand failed.
[109/890] Compiling C object
drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o
FAILED: drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o
cc -Idrivers/libtmp_rte_net_i40e.a.p -Idrivers -I../drivers -Idrivers/net/i40e
-I../drivers/net/i40e -Idrivers/net/i40e/base -I../drivers/net/i40e/base
-Ilib/ethdev -I../lib/ethdev -I. -I.. -Iconfig -I../config -Ilib/eal/include
-I../lib/eal/include -Ilib/eal/freebsd/include -I../lib/eal/freebsd/include
-Ilib/eal/x86/include -I../lib/eal/x86/include -Ilib/eal/common
-I../lib/eal/common -Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs
-Ilib/metrics -I../lib/metrics -Ilib/telemetry -I../lib/telemetry -Ilib/net
-I../lib/net -Ilib/mbuf -I../lib/mbuf -Ilib/mempool -I../lib/mempool -Ilib/ring
-I../lib/ring -Ilib/meter -I../lib/meter -Idrivers/bus/pci -I../drivers/bus/pci
-I../drivers/bus/pci/bsd -Ilib/pci -I../lib/pci -Idrivers/bus/vdev
-I../drivers/bus/vdev -Ilib/hash -I../lib/hash -Ilib/rcu -I../lib/rcu
-fcolor-diagnostics -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -O3 -include
rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat -Wformat-nonliteral
-Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wnested-externs
-Wold-style-definition -Wpointer-arith -Wsign-compare -Wstrict-prototypes
-Wundef -Wwrite-strings -Wno-address-of-packed-member
-Wno-missing-field-initializers -D_GNU_SOURCE -D__BSD_VISIBLE -fPIC
-march=native -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -DPF_DRIVER
-DVF_DRIVER -DINTEGRATED_VF -DX722_A0_SUPPORT -DCC_AVX2_SUPPORT
-DCC_AVX512_SUPPORT -DRTE_LOG_DEFAULT_LOGTYPE=pmd.net.i40e -MD -MQ
drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o -MF
drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o.d -o
drivers/libtmp_rte_net_i40e.a.p/net_i40e_i40e_rxtx_vec_avx2.c.o -c
../drivers/net/i40e/i40e_rxtx_vec_avx2.c
In file included from ../drivers/net/i40e/i40e_rxtx_vec_avx2.c:13:
../drivers/net/i40e/i40e_rxtx_vec_common.h:337:22: error: always_inline
function '_mm512_set1_epi64' requires target feature 'avx512f', but would be
inlined into function 'i40e_rxq_rearm_common' that is compiled without support
for 'avx512f'
__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:337:22: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:385:5: error:
'__builtin_ia32_inserti64x4' needs target feature avx512f
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
^
/usr/lib/clang/11.0.1/include/avx512fintrin.h:7413:12: note: expanded from
macro '_mm512_inserti64x4'
(__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
^
In file included from ../drivers/net/i40e/i40e_rxtx_vec_avx2.c:13:
../drivers/net/i40e/i40e_rxtx_vec_common.h:385:24: error: always_inline
function '_mm512_castsi256_si512' requires target feature 'avx512f', but would
be inlined into function 'i40e_rxq_rearm_common' that is compiled without
support for 'avx512f'
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:385:24: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:388:5: error:
'__builtin_ia32_inserti64x4' needs target feature avx512f
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
^
/usr/lib/clang/11.0.1/include/avx512fintrin.h:7413:12: note: expanded from
macro '_mm512_inserti64x4'
(__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
^
In file included from ../drivers/net/i40e/i40e_rxtx_vec_avx2.c:13:
../drivers/net/i40e/i40e_rxtx_vec_common.h:388:24: error: always_inline
function '_mm512_castsi256_si512' requires target feature 'avx512f', but would
be inlined into function 'i40e_rxq_rearm_common' that is compiled without
support for 'avx512f'
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:388:24: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:392:18: error: always_inline
function '_mm512_unpackhi_epi64' requires target feature 'avx512f', but would
be inlined into function 'i40e_rxq_rearm_common' that is compiled without
support for 'avx512f'
dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3,
vaddr0_3);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:392:18: error: AVX vector argument
of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:393:18: error: always_inline
function '_mm512_unpackhi_epi64' requires target feature 'avx512f', but would
be inlined into function 'i40e_rxq_rearm_common' that is compiled without
support for 'avx512f'
dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7,
vaddr4_7);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:393:18: error: AVX vector argument
of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:396:18: error: always_inline
function '_mm512_add_epi64' requires target feature 'avx512f', but would be
inlined into function 'i40e_rxq_rearm_common' that is compiled without support
for 'avx512f'
dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:396:18: error: AVX vector argument
of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:397:18: error: always_inline
function '_mm512_add_epi64' requires target feature 'avx512f', but would be
inlined into function 'i40e_rxq_rearm_common' that is compiled without support
for 'avx512f'
dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:397:18: error: AVX vector argument
of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:400:4: error: always_inline function
'_mm512_store_si512' requires target feature 'avx512f', but would be inlined
into function 'i40e_rxq_rearm_common' that is compiled without support for
'avx512f'
_mm512_store_si512((__m512i *)&rxdp->read,
dma_addr0_3);
^
../drivers/net/i40e/i40e_rxtx_vec_common.h:400:4: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/i40e/i40e_rxtx_vec_common.h:401:4: error: always_inline function
'_mm512_store_si512' requires target feature 'avx512f', but would be inlined
into function 'i40e_rxq_rearm_common' that is compiled without support for
'avx512f'
_mm512_store_si512((__m512i *)&(rxdp + 4)->read,
dma_addr4_7);
^
fatal error: too many errors emitted, stopping now [-ferror-limit=]
20 errors generated.
[119/890] Compiling C object
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o
FAILED: drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o
cc -Idrivers/libtmp_rte_net_ice.a.p -Idrivers -I../drivers -Idrivers/net/ice
-I../drivers/net/ice -Idrivers/net/ice/base -I../drivers/net/ice/base
-Idrivers/common/iavf -I../drivers/common/iavf -Ilib/ethdev -I../lib/ethdev -I.
-I.. -Iconfig -I../config -Ilib/eal/include -I../lib/eal/include
-Ilib/eal/freebsd/include -I../lib/eal/freebsd/include -Ilib/eal/x86/include
-I../lib/eal/x86/include -Ilib/eal/common -I../lib/eal/common -Ilib/eal
-I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/metrics -I../lib/metrics
-Ilib/telemetry -I../lib/telemetry -Ilib/net -I../lib/net -Ilib/mbuf
-I../lib/mbuf -Ilib/mempool -I../lib/mempool -Ilib/ring -I../lib/ring
-Ilib/meter -I../lib/meter -Idrivers/bus/pci -I../drivers/bus/pci
-I../drivers/bus/pci/bsd -Ilib/pci -I../lib/pci -Idrivers/bus/vdev
-I../drivers/bus/vdev -Ilib/hash -I../lib/hash -Ilib/rcu -I../lib/rcu
-fcolor-diagnostics -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -O3 -include
rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat -Wformat-nonliteral
-Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wnested-externs
-Wold-style-definition -Wpointer-arith -Wsign-compare -Wstrict-prototypes
-Wundef -Wwrite-strings -Wno-address-of-packed-member
-Wno-missing-field-initializers -D_GNU_SOURCE -D__BSD_VISIBLE -fPIC
-march=native -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -DCC_AVX2_SUPPORT
-DCC_AVX512_SUPPORT -DRTE_LOG_DEFAULT_LOGTYPE=pmd.net.ice -MD -MQ
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o -MF
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o.d -o
drivers/libtmp_rte_net_ice.a.p/net_ice_ice_rxtx_vec_avx2.c.o -c
../drivers/net/ice/ice_rxtx_vec_avx2.c
In file included from ../drivers/net/ice/ice_rxtx_vec_avx2.c:5:
../drivers/net/ice/ice_rxtx_vec_common.h:422:22: error: always_inline function
'_mm512_set1_epi64' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
^
../drivers/net/ice/ice_rxtx_vec_common.h:422:22: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:470:5: error:
'__builtin_ia32_inserti64x4' needs target feature avx512f
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
^
/usr/lib/clang/11.0.1/include/avx512fintrin.h:7413:12: note: expanded from
macro '_mm512_inserti64x4'
(__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
^
In file included from ../drivers/net/ice/ice_rxtx_vec_avx2.c:5:
../drivers/net/ice/ice_rxtx_vec_common.h:470:24: error: always_inline function
'_mm512_castsi256_si512' requires target feature 'avx512f', but would be
inlined into function 'ice_rxq_rearm_common' that is compiled without support
for 'avx512f'
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
^
../drivers/net/ice/ice_rxtx_vec_common.h:470:24: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:473:5: error:
'__builtin_ia32_inserti64x4' needs target feature avx512f
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
^
/usr/lib/clang/11.0.1/include/avx512fintrin.h:7413:12: note: expanded from
macro '_mm512_inserti64x4'
(__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
^
In file included from ../drivers/net/ice/ice_rxtx_vec_avx2.c:5:
../drivers/net/ice/ice_rxtx_vec_common.h:473:24: error: always_inline function
'_mm512_castsi256_si512' requires target feature 'avx512f', but would be
inlined into function 'ice_rxq_rearm_common' that is compiled without support
for 'avx512f'
_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
^
../drivers/net/ice/ice_rxtx_vec_common.h:473:24: error: AVX vector return of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:477:18: error: always_inline function
'_mm512_unpackhi_epi64' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3,
vaddr0_3);
^
../drivers/net/ice/ice_rxtx_vec_common.h:477:18: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:478:18: error: always_inline function
'_mm512_unpackhi_epi64' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7,
vaddr4_7);
^
../drivers/net/ice/ice_rxtx_vec_common.h:478:18: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:481:18: error: always_inline function
'_mm512_add_epi64' requires target feature 'avx512f', but would be inlined into
function 'ice_rxq_rearm_common' that is compiled without support for 'avx512f'
dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
^
../drivers/net/ice/ice_rxtx_vec_common.h:481:18: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:482:18: error: always_inline function
'_mm512_add_epi64' requires target feature 'avx512f', but would be inlined into
function 'ice_rxq_rearm_common' that is compiled without support for 'avx512f'
dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
^
../drivers/net/ice/ice_rxtx_vec_common.h:482:18: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:485:4: error: always_inline function
'_mm512_store_si512' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
_mm512_store_si512((__m512i *)&rxdp->read,
dma_addr0_3);
^
../drivers/net/ice/ice_rxtx_vec_common.h:485:4: error: AVX vector argument of
type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled
changes the ABI
../drivers/net/ice/ice_rxtx_vec_common.h:486:4: error: always_inline function
'_mm512_store_si512' requires target feature 'avx512f', but would be inlined
into function 'ice_rxq_rearm_common' that is compiled without support for
'avx512f'
_mm512_store_si512((__m512i *)&(rxdp + 4)->read,
dma_addr4_7);
^
fatal error: too many errors emitted, stopping now [-ferror-limit=]
20 errors generated.
ninja: build stopped: subcommand failed.
--
You are receiving this mail because:
You are the assignee for the bug.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD
2021-08-10 18:27 [dpdk-dev] [Bug 788] i40e: 16BYTE_RX_DESC build broken on FreeBSD-13 bugzilla
@ 2021-08-18 16:38 ` Bruce Richardson
2021-08-18 16:42 ` Bruce Richardson
2021-09-01 6:23 ` Zhang, Qi Z
0 siblings, 2 replies; 12+ messages in thread
From: Bruce Richardson @ 2021-08-18 16:38 UTC (permalink / raw)
To: dev; +Cc: brian90013, Bruce Richardson, wenzhuo.lu, stable, Beilei Xing
The common header file for vectorization is included in multiple files,
and so must use macros for the current compilation unit, rather than the
compiler-capability flag set for the whole driver. With the current,
incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
to the compiler-defined "__AVX*__" macros fixes this issue.
Bugzilla ID: 788
Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
Cc: wenzhuo.lu@intel.com
Cc: stable@dpdk.org
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/i40e/i40e_rxtx_vec_common.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index f52ed98d62..65715ed1ce 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -268,7 +268,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
#endif
}
-#ifdef CC_AVX2_SUPPORT
+#ifdef __AVX2__
static __rte_always_inline void
i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
{
@@ -329,7 +329,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
}
#else
-#ifdef CC_AVX512_SUPPORT
+#ifdef __AVX512VL__
if (avx512) {
struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
--
2.30.2
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD
2021-08-18 16:38 ` [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD Bruce Richardson
@ 2021-08-18 16:42 ` Bruce Richardson
2021-09-03 10:00 ` Ferruh Yigit
2021-09-01 6:23 ` Zhang, Qi Z
1 sibling, 1 reply; 12+ messages in thread
From: Bruce Richardson @ 2021-08-18 16:42 UTC (permalink / raw)
To: dev; +Cc: wenzhuo.lu, Beilei Xing
On Wed, Aug 18, 2021 at 05:38:15PM +0100, Bruce Richardson wrote:
> The common header file for vectorization is included in multiple files,
> and so must use macros for the current compilation unit, rather than the
> compiler-capability flag set for the whole driver. With the current,
> incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
> SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
> to the compiler-defined "__AVX*__" macros fixes this issue.
>
> Bugzilla ID: 788
> Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
> Cc: wenzhuo.lu@intel.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> drivers/net/i40e/i40e_rxtx_vec_common.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
> index f52ed98d62..65715ed1ce 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_common.h
> +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
> @@ -268,7 +268,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
> #endif
> }
>
> -#ifdef CC_AVX2_SUPPORT
> +#ifdef __AVX2__
> static __rte_always_inline void
> i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
> {
On a higher-level, I'd suggest we look to remove the use of these macros
and AVX code in general from this header file (and ice driver equivalent).
IIRC this file was originally meant to contain only the "common" code i.e.
the scalar code, to be shared among vector implementations. Having AVX code
in this file can lead to these sorts of bugs and just makes the file no
longer truely common. The code in question here, should probably go in a
"common_avx" header, which means that we can remove the AVX2 conditions
from it etc.
/Bruce
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD
2021-08-18 16:42 ` Bruce Richardson
@ 2021-09-03 10:00 ` Ferruh Yigit
0 siblings, 0 replies; 12+ messages in thread
From: Ferruh Yigit @ 2021-09-03 10:00 UTC (permalink / raw)
To: Bruce Richardson, dev; +Cc: wenzhuo.lu, Beilei Xing
On 8/18/2021 5:42 PM, Bruce Richardson wrote:
> On Wed, Aug 18, 2021 at 05:38:15PM +0100, Bruce Richardson wrote:
>> The common header file for vectorization is included in multiple files,
>> and so must use macros for the current compilation unit, rather than the
>> compiler-capability flag set for the whole driver. With the current,
>> incorrect, macro, the AVX512 or AVX2 flags may be set when compiling up
>> SSE code, leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
>> to the compiler-defined "__AVX*__" macros fixes this issue.
>>
>> Bugzilla ID: 788
>> Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
>> Cc: wenzhuo.lu@intel.com
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
>> ---
>> drivers/net/i40e/i40e_rxtx_vec_common.h | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
>> index f52ed98d62..65715ed1ce 100644
>> --- a/drivers/net/i40e/i40e_rxtx_vec_common.h
>> +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
>> @@ -268,7 +268,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
>> #endif
>> }
>>
>> -#ifdef CC_AVX2_SUPPORT
>> +#ifdef __AVX2__
>> static __rte_always_inline void
>> i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
>> {
>
> On a higher-level, I'd suggest we look to remove the use of these macros
> and AVX code in general from this header file (and ice driver equivalent).
> IIRC this file was originally meant to contain only the "common" code i.e.
> the scalar code, to be shared among vector implementations. Having AVX code
> in this file can lead to these sorts of bugs and just makes the file no
> longer truely common. The code in question here, should probably go in a
> "common_avx" header, which means that we can remove the AVX2 conditions
> from it etc.
>
Indeed I come to this thread to make exact same comment, I think better to move
avx specific code into a "common_avx" header.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD
2021-08-18 16:38 ` [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD Bruce Richardson
2021-08-18 16:42 ` Bruce Richardson
@ 2021-09-01 6:23 ` Zhang, Qi Z
1 sibling, 0 replies; 12+ messages in thread
From: Zhang, Qi Z @ 2021-09-01 6:23 UTC (permalink / raw)
To: Richardson, Bruce, dev
Cc: brian90013, Richardson, Bruce, Lu, Wenzhuo, stable, Xing, Beilei
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Bruce Richardson
> Sent: Thursday, August 19, 2021 12:38 AM
> To: dev@dpdk.org
> Cc: brian90013@gmail.com; Richardson, Bruce <bruce.richardson@intel.com>;
> Lu, Wenzhuo <wenzhuo.lu@intel.com>; stable@dpdk.org; Xing, Beilei
> <beilei.xing@intel.com>
> Subject: [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD
>
> The common header file for vectorization is included in multiple files, and so
> must use macros for the current compilation unit, rather than the
> compiler-capability flag set for the whole driver. With the current, incorrect,
> macro, the AVX512 or AVX2 flags may be set when compiling up SSE code,
> leading to compilation errors. Changing from "CC_AVX*_SUPPORT"
> to the compiler-defined "__AVX*__" macros fixes this issue.
>
> Bugzilla ID: 788
> Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
> Cc: wenzhuo.lu@intel.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> drivers/net/i40e/i40e_rxtx_vec_common.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h
> b/drivers/net/i40e/i40e_rxtx_vec_common.h
> index f52ed98d62..65715ed1ce 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_common.h
> +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
> @@ -268,7 +268,7 @@
> i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
> #endif }
>
> -#ifdef CC_AVX2_SUPPORT
> +#ifdef __AVX2__
> static __rte_always_inline void
> i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool
> avx512) { @@ -329,7 +329,7 @@ i40e_rxq_rearm_common(struct
> i40e_rx_queue *rxq, __rte_unused bool avx512)
> _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
> }
> #else
> -#ifdef CC_AVX512_SUPPORT
> +#ifdef __AVX512VL__
> if (avx512) {
> struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
> struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
> --
> 2.30.2
Applied to dpdk-next-net-intel.
Thanks
Qi
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2021-10-19 11:01 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-29 12:13 [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD Leyi Rong
2021-09-29 12:13 ` [dpdk-dev] [PATCH 2/2] net/ice: " Leyi Rong
2021-10-18 21:35 ` [dpdk-dev] [PATCH 1/2] net/i40e: " Ferruh Yigit
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Leyi Rong
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 1/2] net/i40e: fix generic build " Leyi Rong
2021-10-19 3:02 ` [dpdk-dev] [PATCH v2 2/2] net/ice: " Leyi Rong
2021-10-19 9:18 ` [dpdk-dev] [PATCH v2 0/2] fix generic build error " Ferruh Yigit
2021-10-19 11:01 ` Zhang, Qi Z
-- strict thread matches above, loose matches on Subject: below --
2021-08-10 18:27 [dpdk-dev] [Bug 788] i40e: 16BYTE_RX_DESC build broken on FreeBSD-13 bugzilla
2021-08-18 16:38 ` [dpdk-dev] [PATCH 1/2] net/i40e: fix generic build on FreeBSD Bruce Richardson
2021-08-18 16:42 ` Bruce Richardson
2021-09-03 10:00 ` Ferruh Yigit
2021-09-01 6:23 ` Zhang, Qi Z
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).