* [dpdk-dev] [PATCH 1/1] net/i40e: fix compilation failure on core-avx-i
@ 2021-06-22 9:37 Shahed Shaikh
0 siblings, 0 replies; 3+ messages in thread
From: Shahed Shaikh @ 2021-06-22 9:37 UTC (permalink / raw)
To: beilei.xing; +Cc: dev, Shahed Shaikh, stable
i40e_rxtx_vec_sse.c fails to compile with below configuration:
- #define RTE_LIBRTE_I40E_16BYTE_RX_DESC 1 in config/rte_config.h
- cpu=core-axv-i
- gcc which supports -mavx2 (e.g. gcc 4.8.5)
This is because commit 0604b1f2208f ("net/i40e: fix crash in AVX512")
added i40e_rxq_rearm_common() to i40e_rxtx_vec_common.h which is
included by i40e_rxtx_vec_sse.c.
This function is enabled for compilation if CC_AVX2_SUPPORT is defined.
As per drivers/net/i40e/meson.build, CC_AVX2_SUPPORT is defined when
either CPU supports __AVX2__ or compiler supports -mavx2 option.
So for given configuration, CC_AVX2_SUPPORT gets defined but we
don't pass -mavx2 explicitly to gcc while compiling i40e_rxtx_vec_sse.c.
Hence it fails due to avx2 specific code from i40e_rxq_rearm_command().
This patch tries to fix the compilation by moving
i40e_rxq_rearm_common() to a new header file which will only be
included by i40e_rxtx_vec_avx2.c and i40e_rxtx_vec_avx512.c.
Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
Cc: stable@dpdk.org
Signed-off-by: Shahed Shaikh <shaikh@niometrics.com>
---
drivers/net/i40e/i40e_rxtx_vec_avx2.c | 2 +-
drivers/net/i40e/i40e_rxtx_vec_avx512.c | 2 +-
drivers/net/i40e/i40e_rxtx_vec_avx_common.h | 210 ++++++++++++++++++++
drivers/net/i40e/i40e_rxtx_vec_common.h | 201 -------------------
4 files changed, 212 insertions(+), 203 deletions(-)
create mode 100644 drivers/net/i40e/i40e_rxtx_vec_avx_common.h
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 3b9eef91a9..2afbb71b75 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -10,7 +10,7 @@
#include "base/i40e_type.h"
#include "i40e_ethdev.h"
#include "i40e_rxtx.h"
-#include "i40e_rxtx_vec_common.h"
+#include "i40e_rxtx_vec_avx_common.h"
#include <rte_vect.h>
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index bd21d64223..ad225b0e54 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -10,7 +10,7 @@
#include "base/i40e_type.h"
#include "i40e_ethdev.h"
#include "i40e_rxtx.h"
-#include "i40e_rxtx_vec_common.h"
+#include "i40e_rxtx_vec_avx_common.h"
#include <rte_vect.h>
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx_common.h b/drivers/net/i40e/i40e_rxtx_vec_avx_common.h
new file mode 100644
index 0000000000..9f34e52efb
--- /dev/null
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx_common.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#ifndef _I40E_RXTX_VEC_AVX_COMMON_H_
+#define _I40E_RXTX_VEC_AVX_COMMON_H_
+
+#include "i40e_rxtx_vec_common.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#ifdef CC_AVX2_SUPPORT
+static __rte_always_inline void
+i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+{
+ int i;
+ uint16_t rx_id;
+ volatile union i40e_rx_desc *rxdp;
+ struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+ /* Pull 'n' more MBUFs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mp,
+ (void *)rxep,
+ RTE_I40E_RXQ_REARM_THRESH) < 0) {
+ if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+ rxq->nb_rx_desc) {
+ __m128i dma_addr0;
+ dma_addr0 = _mm_setzero_si128();
+ for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+ rxep[i].mbuf = &rxq->fake_mbuf;
+ _mm_store_si128((__m128i *)&rxdp[i].read,
+ dma_addr0);
+ }
+ }
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+ RTE_I40E_RXQ_REARM_THRESH;
+ return;
+ }
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+ struct rte_mbuf *mb0, *mb1;
+ __m128i dma_addr0, dma_addr1;
+ __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+ RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+ __m128i vaddr0, vaddr1;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+ dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+ /* add headroom to pa values */
+ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+ dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+ }
+#else
+#ifdef CC_AVX512_SUPPORT
+ if (avx512) {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+ __m512i dma_addr0_3, dma_addr4_7;
+ __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 8 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+ i += 8, rxep += 8, rxdp += 8) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m128i vaddr4, vaddr5, vaddr6, vaddr7;
+ __m256i vaddr0_1, vaddr2_3;
+ __m256i vaddr4_5, vaddr6_7;
+ __m512i vaddr0_3, vaddr4_7;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+ mb4 = rxep[4].mbuf;
+ mb5 = rxep[5].mbuf;
+ mb6 = rxep[6].mbuf;
+ mb7 = rxep[7].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+ vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+ vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+ vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+ vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+ /**
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3, and so on.
+ */
+ vaddr0_1 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+ vaddr1, 1);
+ vaddr2_3 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+ vaddr3, 1);
+ vaddr4_5 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+ vaddr5, 1);
+ vaddr6_7 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+ vaddr7, 1);
+ vaddr0_3 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+ vaddr2_3, 1);
+ vaddr4_7 =
+ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+ vaddr6_7, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+ dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+ /* add headroom to pa values */
+ dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+ dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+ _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+ }
+ } else
+#endif
+ {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ __m256i dma_addr0_1, dma_addr2_3;
+ __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+ /* Initialize the mbufs in vector, process 4 mbufs in one loop */
+ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+ i += 4, rxep += 4, rxdp += 4) {
+ __m128i vaddr0, vaddr1, vaddr2, vaddr3;
+ __m256i vaddr0_1, vaddr2_3;
+
+ mb0 = rxep[0].mbuf;
+ mb1 = rxep[1].mbuf;
+ mb2 = rxep[2].mbuf;
+ mb3 = rxep[3].mbuf;
+
+ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+ offsetof(struct rte_mbuf, buf_addr) + 8);
+ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+ /*
+ * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+ * into the high lanes. Similarly for 2 & 3
+ */
+ vaddr0_1 = _mm256_inserti128_si256(
+ _mm256_castsi128_si256(vaddr0), vaddr1, 1);
+ vaddr2_3 = _mm256_inserti128_si256(
+ _mm256_castsi128_si256(vaddr2), vaddr3, 1);
+
+ /* convert pa to dma_addr hdr/data */
+ dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
+ dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
+
+ /* add headroom to pa values */
+ dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
+ dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
+
+ /* flush desc with pa dma_addr */
+ _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
+ _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
+ }
+ }
+
+#endif
+
+ rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+ if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+ rxq->rxrearm_start = 0;
+
+ rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+ rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+ (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+ /* Update the tail pointer on the NIC */
+ I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+}
+#endif
+
+#endif
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 16fcf0aec6..33cebbe88b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -11,10 +11,6 @@
#include "i40e_ethdev.h"
#include "i40e_rxtx.h"
-#ifndef __INTEL_COMPILER
-#pragma GCC diagnostic ignored "-Wcast-qual"
-#endif
-
static inline uint16_t
reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs,
uint16_t nb_bufs, uint8_t *split_flags)
@@ -256,201 +252,4 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
return -1;
#endif
}
-
-#ifdef CC_AVX2_SUPPORT
-static __rte_always_inline void
-i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
-{
- int i;
- uint16_t rx_id;
- volatile union i40e_rx_desc *rxdp;
- struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
- rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
- /* Pull 'n' more MBUFs into the software ring */
- if (rte_mempool_get_bulk(rxq->mp,
- (void *)rxep,
- RTE_I40E_RXQ_REARM_THRESH) < 0) {
- if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
- rxq->nb_rx_desc) {
- __m128i dma_addr0;
- dma_addr0 = _mm_setzero_si128();
- for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
- rxep[i].mbuf = &rxq->fake_mbuf;
- _mm_store_si128((__m128i *)&rxdp[i].read,
- dma_addr0);
- }
- }
- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_I40E_RXQ_REARM_THRESH;
- return;
- }
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
- struct rte_mbuf *mb0, *mb1;
- __m128i dma_addr0, dma_addr1;
- __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
- RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 2 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
- __m128i vaddr0, vaddr1;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
- dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
- /* add headroom to pa values */
- dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
- dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
- }
-#else
-#ifdef CC_AVX512_SUPPORT
- if (avx512) {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
- __m512i dma_addr0_3, dma_addr4_7;
- __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 8 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
- i += 8, rxep += 8, rxdp += 8) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m128i vaddr4, vaddr5, vaddr6, vaddr7;
- __m256i vaddr0_1, vaddr2_3;
- __m256i vaddr4_5, vaddr6_7;
- __m512i vaddr0_3, vaddr4_7;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
- mb4 = rxep[4].mbuf;
- mb5 = rxep[5].mbuf;
- mb6 = rxep[6].mbuf;
- mb7 = rxep[7].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
- vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
- vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
- vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
- vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
- /**
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3, and so on.
- */
- vaddr0_1 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
- vaddr1, 1);
- vaddr2_3 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
- vaddr3, 1);
- vaddr4_5 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
- vaddr5, 1);
- vaddr6_7 =
- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
- vaddr7, 1);
- vaddr0_3 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
- vaddr2_3, 1);
- vaddr4_7 =
- _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
- vaddr6_7, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
- dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
- /* add headroom to pa values */
- dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
- dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
- _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
- }
- } else
-#endif
- {
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- __m256i dma_addr0_1, dma_addr2_3;
- __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
- /* Initialize the mbufs in vector, process 4 mbufs in one loop */
- for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
- i += 4, rxep += 4, rxdp += 4) {
- __m128i vaddr0, vaddr1, vaddr2, vaddr3;
- __m256i vaddr0_1, vaddr2_3;
-
- mb0 = rxep[0].mbuf;
- mb1 = rxep[1].mbuf;
- mb2 = rxep[2].mbuf;
- mb3 = rxep[3].mbuf;
-
- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
- offsetof(struct rte_mbuf, buf_addr) + 8);
- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
- /*
- * merge 0 & 1, by casting 0 to 256-bit and inserting 1
- * into the high lanes. Similarly for 2 & 3
- */
- vaddr0_1 = _mm256_inserti128_si256(
- _mm256_castsi128_si256(vaddr0), vaddr1, 1);
- vaddr2_3 = _mm256_inserti128_si256(
- _mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
- /* convert pa to dma_addr hdr/data */
- dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
- dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
- /* add headroom to pa values */
- dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
- dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
- /* flush desc with pa dma_addr */
- _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
- _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
- }
- }
-
-#endif
-
- rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
- if (rxq->rxrearm_start >= rxq->nb_rx_desc)
- rxq->rxrearm_start = 0;
-
- rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
- rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
- (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
- /* Update the tail pointer on the NIC */
- I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
#endif
--
2.29.2
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH 1/1] net/i40e: fix compilation failure on core-avx-i
@ 2021-07-14 1:06 Lu, Wenzhuo
2021-08-02 0:17 ` Zhang, Qi Z
0 siblings, 1 reply; 3+ messages in thread
From: Lu, Wenzhuo @ 2021-07-14 1:06 UTC (permalink / raw)
To: shaikh; +Cc: dev, stable, Xing, Beilei
Hi Shahed,
> -----Original Message-----
> From: Shahed Shaikh <shaikh@niometrics.com<mailto:shaikh@niometrics.com>>
> Sent: Tuesday, June 22, 2021 5:37 PM
> To: Xing, Beilei <beilei.xing@intel.com<mailto:beilei.xing@intel.com>>
> Cc: dev@dpdk.org<mailto:dev@dpdk.org>; Shahed Shaikh <shaikh@niometrics.com<mailto:shaikh@niometrics.com>>;
> stable@dpdk.org<mailto:stable@dpdk.org>
> Subject: [PATCH 1/1] net/i40e: fix compilation failure on core-avx-i
>
> i40e_rxtx_vec_sse.c fails to compile with below configuration:
> - #define RTE_LIBRTE_I40E_16BYTE_RX_DESC 1 in config/rte_config.h
> - cpu=core-axv-i
> - gcc which supports -mavx2 (e.g. gcc 4.8.5)
>
> This is because commit 0604b1f2208f ("net/i40e: fix crash in AVX512")
> added
> i40e_rxq_rearm_common() to i40e_rxtx_vec_common.h which is included by
> i40e_rxtx_vec_sse.c.
>
> This function is enabled for compilation if CC_AVX2_SUPPORT is defined.
> As per drivers/net/i40e/meson.build, CC_AVX2_SUPPORT is defined when
> either CPU supports __AVX2__ or compiler supports -mavx2 option.
>
> So for given configuration, CC_AVX2_SUPPORT gets defined but we don't
> pass -mavx2 explicitly to gcc while compiling i40e_rxtx_vec_sse.c.
Looks like it's better and more clear if a new specific avx head file is added.
Just want to understand more about the problem. I don't see the same problem when using 'gcc version 4.8.5 20150623 (Red Hat 4.8.5-44) (GCC)', don't understand why -mavx2 is missing.
And more, if so, why the avx2 code doesn't meet the same problem?
Thanks.
> Hence it fails due to avx2 specific code from i40e_rxq_rearm_command().
>
> This patch tries to fix the compilation by moving
> i40e_rxq_rearm_common() to a new header file which will only be
> included by i40e_rxtx_vec_avx2.c and i40e_rxtx_vec_avx512.c.
>
> Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
> Cc: stable@dpdk.org<mailto:stable@dpdk.org>
>
> Signed-off-by: Shahed Shaikh <shaikh@niometrics.com<mailto:shaikh@niometrics.com>>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH 1/1] net/i40e: fix compilation failure on core-avx-i
2021-07-14 1:06 Lu, Wenzhuo
@ 2021-08-02 0:17 ` Zhang, Qi Z
0 siblings, 0 replies; 3+ messages in thread
From: Zhang, Qi Z @ 2021-08-02 0:17 UTC (permalink / raw)
To: Lu, Wenzhuo, shaikh; +Cc: dev, stable, Xing, Beilei
Hi Shahed, Could you answer Wenzhuo' s question?
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Lu, Wenzhuo
> Sent: Wednesday, July 14, 2021 9:06 AM
> To: shaikh@niometrics.com
> Cc: dev@dpdk.org; stable@dpdk.org; Xing, Beilei <beilei.xing@intel.com>
> Subject: Re: [dpdk-dev] [PATCH 1/1] net/i40e: fix compilation failure on
> core-avx-i
>
> Hi Shahed,
>
>
>
> > -----Original Message-----
>
> > From: Shahed Shaikh
> <shaikh@niometrics.com<mailto:shaikh@niometrics.com>>
>
> > Sent: Tuesday, June 22, 2021 5:37 PM
>
> > To: Xing, Beilei <beilei.xing@intel.com<mailto:beilei.xing@intel.com>>
>
> > Cc: dev@dpdk.org<mailto:dev@dpdk.org>; Shahed Shaikh
> <shaikh@niometrics.com<mailto:shaikh@niometrics.com>>;
>
> > stable@dpdk.org<mailto:stable@dpdk.org>
>
> > Subject: [PATCH 1/1] net/i40e: fix compilation failure on core-avx-i
>
> >
>
> > i40e_rxtx_vec_sse.c fails to compile with below configuration:
>
> > - #define RTE_LIBRTE_I40E_16BYTE_RX_DESC 1 in config/rte_config.h
>
> > - cpu=core-axv-i
>
> > - gcc which supports -mavx2 (e.g. gcc 4.8.5)
>
> >
>
> > This is because commit 0604b1f2208f ("net/i40e: fix crash in AVX512")
>
> > added
>
> > i40e_rxq_rearm_common() to i40e_rxtx_vec_common.h which is included by
>
> > i40e_rxtx_vec_sse.c.
>
> >
>
> > This function is enabled for compilation if CC_AVX2_SUPPORT is defined.
>
> > As per drivers/net/i40e/meson.build, CC_AVX2_SUPPORT is defined when
>
> > either CPU supports __AVX2__ or compiler supports -mavx2 option.
>
> >
>
> > So for given configuration, CC_AVX2_SUPPORT gets defined but we don't
>
> > pass -mavx2 explicitly to gcc while compiling i40e_rxtx_vec_sse.c.
>
> Looks like it's better and more clear if a new specific avx head file is added.
>
> Just want to understand more about the problem. I don't see the same
> problem when using 'gcc version 4.8.5 20150623 (Red Hat 4.8.5-44) (GCC)',
> don't understand why -mavx2 is missing.
>
> And more, if so, why the avx2 code doesn't meet the same problem?
>
> Thanks.
>
>
>
> > Hence it fails due to avx2 specific code from i40e_rxq_rearm_command().
>
> >
>
> > This patch tries to fix the compilation by moving
>
> > i40e_rxq_rearm_common() to a new header file which will only be
>
> > included by i40e_rxtx_vec_avx2.c and i40e_rxtx_vec_avx512.c.
>
> >
>
> > Fixes: 0604b1f2208f ("net/i40e: fix crash in AVX512")
>
> > Cc: stable@dpdk.org<mailto:stable@dpdk.org>
>
> >
>
> > Signed-off-by: Shahed Shaikh
> <shaikh@niometrics.com<mailto:shaikh@niometrics.com>>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-08-02 0:17 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-22 9:37 [dpdk-dev] [PATCH 1/1] net/i40e: fix compilation failure on core-avx-i Shahed Shaikh
2021-07-14 1:06 Lu, Wenzhuo
2021-08-02 0:17 ` Zhang, Qi Z
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).