From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D24D146830; Fri, 30 May 2025 15:59:12 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 043F74069D; Fri, 30 May 2025 15:58:04 +0200 (CEST) Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.14]) by mails.dpdk.org (Postfix) with ESMTP id E4699402F1 for ; Fri, 30 May 2025 15:57:57 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1748613478; x=1780149478; h=from:to:subject:date:message-id:in-reply-to:references: mime-version:content-transfer-encoding; bh=IHmtH1QotR4rP3XSOgPlNJNEzoNaitOM7Cz08tTkJk0=; b=dpgYIm/t2vzkg50H5XU7GRRhGsNAuzEIAWl7gmJma98hxdKMEpGqdPSh F4M2oNOkrHweHgCl4nzpoXVAIsY4q4WLtT9VOMRPsQW5qFKJErIB2Aw0b vtSD6rifl0jG+rVz7vCfZ5P5kWtIiIK1mL1RaogEewwiAdExiBMQI0P2J s/iXk+o+fjOsrAy+lJdeWkTHySpR+UhggZGQzZeN2d5bbK8Rxgxwf5T8L WRPo4TG34uaunlbeUTq9/ce8b89dy3Ms+2X8ihvVnthPcddidNy9zot7i GE+ASHfC7+TZkSTp57cHg/AyA4mF2qSKF9DYa25IJLAKNwtBJZXwAqS61 A==; X-CSE-ConnectionGUID: O6ERNh1FTkGrw9x4uivsLg== X-CSE-MsgGUID: CNLKJ5kNTKq9Z+8wdyWquA== X-IronPort-AV: E=McAfee;i="6700,10204,11449"; a="50809387" X-IronPort-AV: E=Sophos;i="6.16,196,1744095600"; d="scan'208";a="50809387" Received: from orviesa002.jf.intel.com ([10.64.159.142]) by fmvoesa108.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 30 May 2025 06:57:57 -0700 X-CSE-ConnectionGUID: Ye2Vn/3lRxm+8WFsATEFgA== X-CSE-MsgGUID: 9Z72+iHbQ+6VCoEDFh5N3g== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.16,196,1744095600"; d="scan'208";a="174887461" Received: from silpixa00401119.ir.intel.com ([10.55.129.167]) by orviesa002.jf.intel.com with ESMTP; 30 May 2025 06:57:56 -0700 From: Anatoly Burakov To: dev@dpdk.org, Ian Stokes , Bruce Richardson Subject: [PATCH v4 12/25] net/i40e: clean up definitions Date: Fri, 30 May 2025 14:57:08 +0100 Message-ID: <345f3e05c1761f203d1a75cf52b1c45c3124fe04.1748612803.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 2.47.1 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org This commit does the following cleanups: - Remove RTE_ prefix from internal definitions - Mark vector-PMD related definitions with a special naming convention - Remove unused definitions - Create "descriptors per loop" for different vector implementations (regular for SSE, Neon, AltiVec, wide for AVX2, AVX512) Signed-off-by: Anatoly Burakov --- Notes: v3 -> v4: - Add this commit drivers/net/intel/i40e/i40e_rxtx.c | 42 +++++++-------- drivers/net/intel/i40e/i40e_rxtx.h | 17 +++--- drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 18 +++---- .../net/intel/i40e/i40e_rxtx_vec_altivec.c | 48 ++++++++--------- drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c | 32 ++++++----- drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c | 32 ++++++----- drivers/net/intel/i40e/i40e_rxtx_vec_neon.c | 53 +++++++++---------- drivers/net/intel/i40e/i40e_rxtx_vec_sse.c | 48 ++++++++--------- 8 files changed, 142 insertions(+), 148 deletions(-) diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c index 5f54bcc225..2e61076378 100644 --- a/drivers/net/intel/i40e/i40e_rxtx.c +++ b/drivers/net/intel/i40e/i40e_rxtx.c @@ -424,11 +424,11 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq) int ret = 0; #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC - if (!(rxq->rx_free_thresh >= RTE_PMD_I40E_RX_MAX_BURST)) { + if (!(rxq->rx_free_thresh >= I40E_RX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: " "rxq->rx_free_thresh=%d, " - "RTE_PMD_I40E_RX_MAX_BURST=%d", - rxq->rx_free_thresh, RTE_PMD_I40E_RX_MAX_BURST); + "I40E_RX_MAX_BURST=%d", + rxq->rx_free_thresh, I40E_RX_MAX_BURST); ret = -EINVAL; } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) { PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: " @@ -484,7 +484,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq) * Scan LOOK_AHEAD descriptors at a time to determine which * descriptors reference packets that are ready to be received. */ - for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; i+=I40E_LOOK_AHEAD, + for (i = 0; i < I40E_RX_MAX_BURST; i += I40E_LOOK_AHEAD, rxdp += I40E_LOOK_AHEAD, rxep += I40E_LOOK_AHEAD) { /* Read desc statuses backwards to avoid race condition */ for (j = I40E_LOOK_AHEAD - 1; j >= 0; j--) { @@ -680,11 +680,11 @@ i40e_recv_pkts_bulk_alloc(void *rx_queue, if (unlikely(nb_pkts == 0)) return 0; - if (likely(nb_pkts <= RTE_PMD_I40E_RX_MAX_BURST)) + if (likely(nb_pkts <= I40E_RX_MAX_BURST)) return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts); while (nb_pkts) { - n = RTE_MIN(nb_pkts, RTE_PMD_I40E_RX_MAX_BURST); + n = RTE_MIN(nb_pkts, I40E_RX_MAX_BURST); count = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n); nb_rx = (uint16_t)(nb_rx + count); nb_pkts = (uint16_t)(nb_pkts - count); @@ -1334,9 +1334,9 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq) struct ci_tx_entry *txep; uint16_t tx_rs_thresh = txq->tx_rs_thresh; uint16_t i = 0, j = 0; - struct rte_mbuf *free[RTE_I40E_TX_MAX_FREE_BUF_SZ]; - const uint16_t k = RTE_ALIGN_FLOOR(tx_rs_thresh, RTE_I40E_TX_MAX_FREE_BUF_SZ); - const uint16_t m = tx_rs_thresh % RTE_I40E_TX_MAX_FREE_BUF_SZ; + struct rte_mbuf *free[I40E_TX_MAX_FREE_BUF_SZ]; + const uint16_t k = RTE_ALIGN_FLOOR(tx_rs_thresh, I40E_TX_MAX_FREE_BUF_SZ); + const uint16_t m = tx_rs_thresh % I40E_TX_MAX_FREE_BUF_SZ; if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != @@ -1350,13 +1350,13 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq) if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) { if (k) { - for (j = 0; j != k; j += RTE_I40E_TX_MAX_FREE_BUF_SZ) { - for (i = 0; i < RTE_I40E_TX_MAX_FREE_BUF_SZ; ++i, ++txep) { + for (j = 0; j != k; j += I40E_TX_MAX_FREE_BUF_SZ) { + for (i = 0; i < I40E_TX_MAX_FREE_BUF_SZ; ++i, ++txep) { free[i] = txep->mbuf; txep->mbuf = NULL; } rte_mempool_put_bulk(free[0]->pool, (void **)free, - RTE_I40E_TX_MAX_FREE_BUF_SZ); + I40E_TX_MAX_FREE_BUF_SZ); } } @@ -2146,7 +2146,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, * Allocating a little more memory because vectorized/bulk_alloc Rx * functions doesn't check boundaries each time. */ - len += RTE_PMD_I40E_RX_MAX_BURST; + len += I40E_RX_MAX_BURST; ring_size = RTE_ALIGN(len * sizeof(union i40e_rx_desc), I40E_DMA_MEM_ALIGN); @@ -2166,7 +2166,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, rxq->rx_ring_phys_addr = rz->iova; rxq->rx_ring = (union i40e_rx_desc *)rz->addr; - len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST); + len = (uint16_t)(nb_desc + I40E_RX_MAX_BURST); /* Allocate the software ring. */ rxq->sw_ring = @@ -2370,7 +2370,7 @@ i40e_dev_tx_queue_setup_runtime(struct rte_eth_dev *dev, /* check vector conflict */ if (ad->tx_vec_allowed) { - if (txq->tx_rs_thresh > RTE_I40E_TX_MAX_FREE_BUF_SZ || + if (txq->tx_rs_thresh > I40E_TX_MAX_FREE_BUF_SZ || i40e_txq_vec_setup(txq)) { PMD_DRV_LOG(ERR, "Failed vector tx setup."); return -EINVAL; @@ -2379,7 +2379,7 @@ i40e_dev_tx_queue_setup_runtime(struct rte_eth_dev *dev, /* check simple tx conflict */ if (ad->tx_simple_allowed) { if ((txq->offloads & ~RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) != 0 || - txq->tx_rs_thresh < RTE_PMD_I40E_TX_MAX_BURST) { + txq->tx_rs_thresh < I40E_TX_MAX_BURST) { PMD_DRV_LOG(ERR, "No-simple tx is required."); return -EINVAL; } @@ -2675,7 +2675,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq) #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0) - len = (uint16_t)(rxq->nb_rx_desc + RTE_PMD_I40E_RX_MAX_BURST); + len = (uint16_t)(rxq->nb_rx_desc + I40E_RX_MAX_BURST); else #endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */ len = rxq->nb_rx_desc; @@ -2684,7 +2684,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq) ((volatile char *)rxq->rx_ring)[i] = 0; memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf)); - for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; ++i) + for (i = 0; i < I40E_RX_MAX_BURST; ++i) rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf; #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC @@ -3276,7 +3276,7 @@ i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, recycle_rxq_info->receive_tail = &rxq->rx_tail; if (ad->rx_vec_allowed) { - recycle_rxq_info->refill_requirement = RTE_I40E_RXQ_REARM_THRESH; + recycle_rxq_info->refill_requirement = I40E_VPMD_RXQ_REARM_THRESH; recycle_rxq_info->refill_head = &rxq->rxrearm_start; } else { recycle_rxq_info->refill_requirement = rxq->rx_free_thresh; @@ -3501,9 +3501,9 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct ci_tx_queue *txq) ad->tx_simple_allowed = (txq->offloads == (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) && - txq->tx_rs_thresh >= RTE_PMD_I40E_TX_MAX_BURST); + txq->tx_rs_thresh >= I40E_TX_MAX_BURST); ad->tx_vec_allowed = (ad->tx_simple_allowed && - txq->tx_rs_thresh <= RTE_I40E_TX_MAX_FREE_BUF_SZ); + txq->tx_rs_thresh <= I40E_TX_MAX_FREE_BUF_SZ); if (ad->tx_vec_allowed) PMD_INIT_LOG(DEBUG, "Vector Tx can be enabled on Tx queue %u.", diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h index 568f0536ac..3dca32b1ba 100644 --- a/drivers/net/intel/i40e/i40e_rxtx.h +++ b/drivers/net/intel/i40e/i40e_rxtx.h @@ -7,15 +7,14 @@ #include "../common/tx.h" -#define RTE_PMD_I40E_RX_MAX_BURST 32 -#define RTE_PMD_I40E_TX_MAX_BURST 32 +#define I40E_RX_MAX_BURST 32 +#define I40E_TX_MAX_BURST 32 -#define RTE_I40E_VPMD_RX_BURST 32 -#define RTE_I40E_VPMD_TX_BURST 32 -#define RTE_I40E_RXQ_REARM_THRESH 32 -#define RTE_I40E_MAX_RX_BURST RTE_I40E_RXQ_REARM_THRESH -#define RTE_I40E_TX_MAX_FREE_BUF_SZ 64 -#define RTE_I40E_DESCS_PER_LOOP 4 +#define I40E_VPMD_RX_BURST 32 +#define I40E_VPMD_RXQ_REARM_THRESH 32 +#define I40E_TX_MAX_FREE_BUF_SZ 64 +#define I40E_VPMD_DESCS_PER_LOOP 4 +#define I40E_VPMD_DESCS_PER_LOOP_WIDE 8 #define I40E_RXBUF_SZ_1024 1024 #define I40E_RXBUF_SZ_2048 2048 @@ -97,7 +96,7 @@ struct i40e_rx_queue { uint16_t rx_nb_avail; /**< number of staged packets ready */ uint16_t rx_next_avail; /**< index of next staged packets */ uint16_t rx_free_trigger; /**< triggers rx buffer allocation */ - struct rte_mbuf *rx_stage[RTE_PMD_I40E_RX_MAX_BURST * 2]; + struct rte_mbuf *rx_stage[I40E_RX_MAX_BURST * 2]; #endif uint16_t rxrearm_nb; /**< number of remaining to be re-armed */ diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h index 7d2bda624b..8fc7cd5bd4 100644 --- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h +++ b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h @@ -25,19 +25,19 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512) /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mp, (void *)rxep, - RTE_I40E_RXQ_REARM_THRESH) < 0) { - if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= + I40E_VPMD_RXQ_REARM_THRESH) < 0) { + if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >= rxq->nb_rx_desc) { __m128i dma_addr0; dma_addr0 = _mm_setzero_si128(); - for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) { rxep[i].mbuf = &rxq->fake_mbuf; _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read), dma_addr0); } } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - RTE_I40E_RXQ_REARM_THRESH; + I40E_VPMD_RXQ_REARM_THRESH; return; } @@ -47,7 +47,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512) __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, RTE_PKTMBUF_HEADROOM); /* Initialize the mbufs in vector, process 2 mbufs in one loop */ - for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { + for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) { __m128i vaddr0, vaddr1; mb0 = rxep[0].mbuf; @@ -79,7 +79,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512) __m512i dma_addr0_3, dma_addr4_7; __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM); /* Initialize the mbufs in vector, process 8 mbufs in one loop */ - for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; + for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 8, rxep += 8, rxdp += 8) { __m128i vaddr0, vaddr1, vaddr2, vaddr3; __m128i vaddr4, vaddr5, vaddr6, vaddr7; @@ -152,7 +152,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512) __m256i dma_addr0_1, dma_addr2_3; __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM); /* Initialize the mbufs in vector, process 4 mbufs in one loop */ - for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; + for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 4, rxep += 4, rxdp += 4) { __m128i vaddr0, vaddr1, vaddr2, vaddr3; __m256i vaddr0_1, vaddr2_3; @@ -197,7 +197,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512) #endif - rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; + rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH; rx_id = rxq->rxrearm_start - 1; if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) { @@ -205,7 +205,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512) rx_id = rxq->nb_rx_desc - 1; } - rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; + rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH; /* Update the tail pointer on the NIC */ I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id); diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c index 01dee811ba..568891cfb2 100644 --- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c +++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c @@ -35,23 +35,23 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mp, (void *)rxep, - RTE_I40E_RXQ_REARM_THRESH) < 0) { - if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= + I40E_VPMD_RXQ_REARM_THRESH) < 0) { + if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >= rxq->nb_rx_desc) { dma_addr0 = (__vector unsigned long){}; - for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) { rxep[i].mbuf = &rxq->fake_mbuf; vec_st(dma_addr0, 0, RTE_CAST_PTR(__vector unsigned long *, &rxdp[i].read)); } } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - RTE_I40E_RXQ_REARM_THRESH; + I40E_VPMD_RXQ_REARM_THRESH; return; } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ - for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { + for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) { __vector unsigned long vaddr0, vaddr1; uintptr_t p0, p1; @@ -86,7 +86,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) vec_st(dma_addr1, 0, RTE_CAST_PTR(__vector unsigned long *, &rxdp++->read)); } - rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; + rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH; rx_id = rxq->rxrearm_start - 1; if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) { @@ -94,7 +94,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) rx_id = rxq->nb_rx_desc - 1; } - rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; + rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH; /* Update the tail pointer on the NIC */ I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); @@ -188,11 +188,11 @@ desc_to_ptype_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts, } /** - * vPMD raw receive routine, only accept(nb_pkts >= RTE_I40E_DESCS_PER_LOOP) + * vPMD raw receive routine, only accept(nb_pkts >= I40E_VPMD_DESCS_PER_LOOP) * * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet - * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet + * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two */ static inline uint16_t _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, @@ -215,8 +215,8 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, }; __vector unsigned long dd_check, eop_check; - /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); + /* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP); /* Just the act of getting into the function from the application is * going to cost about 7 cycles @@ -228,7 +228,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ - if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) + if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH) i40e_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if @@ -271,9 +271,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; - pos += RTE_I40E_DESCS_PER_LOOP, - rxdp += RTE_I40E_DESCS_PER_LOOP) { - __vector unsigned long descs[RTE_I40E_DESCS_PER_LOOP]; + pos += I40E_VPMD_DESCS_PER_LOOP, + rxdp += I40E_VPMD_DESCS_PER_LOOP) { + __vector unsigned long descs[I40E_VPMD_DESCS_PER_LOOP]; __vector unsigned char pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; __vector unsigned short staterr, sterr_tmp1, sterr_tmp2; __vector unsigned long mbp1, mbp2; /* two mbuf pointer @@ -406,7 +406,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* store the resulting 32-bit value */ *split_packet = (vec_ld(0, (__vector unsigned int *)&eop_bits))[0]; - split_packet += RTE_I40E_DESCS_PER_LOOP; + split_packet += I40E_VPMD_DESCS_PER_LOOP; /* zero-out next pointers */ rx_pkts[pos]->next = NULL; @@ -433,7 +433,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, var = rte_popcount64((vec_ld(0, (__vector unsigned long *)&staterr)[0])); nb_pkts_recd += var; - if (likely(var != RTE_I40E_DESCS_PER_LOOP)) + if (likely(var != I40E_VPMD_DESCS_PER_LOOP)) break; } @@ -446,7 +446,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, } /* Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -459,14 +459,14 @@ i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, * vPMD receive routine that reassembles single burst of 32 scattered packets * * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ static uint16_t i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct i40e_rx_queue *rxq = rx_queue; - uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; + uint8_t split_flags[I40E_VPMD_RX_BURST] = {0}; /* get some new buffers */ uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, @@ -505,15 +505,15 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, { uint16_t retval = 0; - while (nb_pkts > RTE_I40E_VPMD_RX_BURST) { + while (nb_pkts > I40E_VPMD_RX_BURST) { uint16_t burst; burst = i40e_recv_scattered_burst_vec(rx_queue, rx_pkts + retval, - RTE_I40E_VPMD_RX_BURST); + I40E_VPMD_RX_BURST); retval += burst; nb_pkts -= burst; - if (burst < RTE_I40E_VPMD_RX_BURST) + if (burst < I40E_VPMD_RX_BURST) return retval; } diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c index 4469c73c56..a13dd9bc78 100644 --- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c +++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c @@ -108,8 +108,6 @@ static __rte_always_inline uint16_t _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { -#define RTE_I40E_DESCS_PER_LOOP_AVX 8 - const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0, rxq->mbuf_initializer); @@ -118,13 +116,13 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, const int avx_aligned = ((rxq->rx_tail & 1) == 0); rte_prefetch0(rxdp); - /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP_AVX */ - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP_AVX); + /* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP_WIDE */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP_WIDE); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ - if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) + if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH) i40e_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if @@ -262,8 +260,8 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t i, received; for (i = 0, received = 0; i < nb_pkts; - i += RTE_I40E_DESCS_PER_LOOP_AVX, - rxdp += RTE_I40E_DESCS_PER_LOOP_AVX) { + i += I40E_VPMD_DESCS_PER_LOOP_WIDE, + rxdp += I40E_VPMD_DESCS_PER_LOOP_WIDE) { /* step 1, copy over 8 mbuf pointers to rx_pkts array */ _mm256_storeu_si256((void *)&rx_pkts[i], _mm256_loadu_si256((void *)&sw_ring[i])); @@ -299,7 +297,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, if (split_packet) { int j; - for (j = 0; j < RTE_I40E_DESCS_PER_LOOP_AVX; j++) + for (j = 0; j < I40E_VPMD_DESCS_PER_LOOP_WIDE; j++) rte_mbuf_prefetch_part2(rx_pkts[i + j]); } @@ -577,7 +575,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, 12, 4, 14, 6); split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle); *(uint64_t *)split_packet = _mm_cvtsi128_si64(split_bits); - split_packet += RTE_I40E_DESCS_PER_LOOP_AVX; + split_packet += I40E_VPMD_DESCS_PER_LOOP_WIDE; } /* perform dd_check */ @@ -590,7 +588,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, burst += rte_popcount64(_mm_cvtsi128_si64( _mm256_castsi256_si128(status0_7))); received += burst; - if (burst != RTE_I40E_DESCS_PER_LOOP_AVX) + if (burst != I40E_VPMD_DESCS_PER_LOOP_WIDE) break; } @@ -607,7 +605,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ uint16_t i40e_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -619,14 +617,14 @@ i40e_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, /* * vPMD receive routine that reassembles single burst of 32 scattered packets * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ static uint16_t i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct i40e_rx_queue *rxq = rx_queue; - uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; + uint8_t split_flags[I40E_VPMD_RX_BURST] = {0}; /* get some new buffers */ uint16_t nb_bufs = _recv_raw_pkts_vec_avx2(rxq, rx_pkts, nb_pkts, @@ -661,19 +659,19 @@ i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, * vPMD receive routine that reassembles scattered packets. * Main receive routine that can handle arbitrary burst sizes * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ uint16_t i40e_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { uint16_t retval = 0; - while (nb_pkts > RTE_I40E_VPMD_RX_BURST) { + while (nb_pkts > I40E_VPMD_RX_BURST) { uint16_t burst = i40e_recv_scattered_burst_vec_avx2(rx_queue, - rx_pkts + retval, RTE_I40E_VPMD_RX_BURST); + rx_pkts + retval, I40E_VPMD_RX_BURST); retval += burst; nb_pkts -= burst; - if (burst < RTE_I40E_VPMD_RX_BURST) + if (burst < I40E_VPMD_RX_BURST) return retval; } return retval + i40e_recv_scattered_burst_vec_avx2(rx_queue, diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c index bb25acf398..f0320a221c 100644 --- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c +++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c @@ -15,8 +15,6 @@ #include -#define RTE_I40E_DESCS_PER_LOOP_AVX 8 - static __rte_always_inline void i40e_rxq_rearm(struct i40e_rx_queue *rxq) { @@ -119,13 +117,13 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, rte_prefetch0(rxdp); - /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP_AVX */ - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP_AVX); + /* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP_WIDE */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP_WIDE); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ - if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) + if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH) i40e_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if @@ -245,8 +243,8 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t i, received; for (i = 0, received = 0; i < nb_pkts; - i += RTE_I40E_DESCS_PER_LOOP_AVX, - rxdp += RTE_I40E_DESCS_PER_LOOP_AVX) { + i += I40E_VPMD_DESCS_PER_LOOP_WIDE, + rxdp += I40E_VPMD_DESCS_PER_LOOP_WIDE) { /* step 1, copy over 8 mbuf pointers to rx_pkts array */ _mm256_storeu_si256((void *)&rx_pkts[i], _mm256_loadu_si256((void *)&sw_ring[i])); @@ -312,7 +310,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, if (split_packet) { int j; - for (j = 0; j < RTE_I40E_DESCS_PER_LOOP_AVX; j++) + for (j = 0; j < I40E_VPMD_DESCS_PER_LOOP_WIDE; j++) rte_mbuf_prefetch_part2(rx_pkts[i + j]); } @@ -642,7 +640,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle); *(uint64_t *)split_packet = _mm_cvtsi128_si64(split_bits); - split_packet += RTE_I40E_DESCS_PER_LOOP_AVX; + split_packet += I40E_VPMD_DESCS_PER_LOOP_WIDE; } /* perform dd_check */ @@ -657,7 +655,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, burst += rte_popcount64(_mm_cvtsi128_si64 (_mm256_castsi256_si128(status0_7))); received += burst; - if (burst != RTE_I40E_DESCS_PER_LOOP_AVX) + if (burst != I40E_VPMD_DESCS_PER_LOOP_WIDE) break; } @@ -674,7 +672,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, /** * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ uint16_t i40e_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -686,7 +684,7 @@ i40e_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts, /** * vPMD receive routine that reassembles single burst of 32 scattered packets * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ static uint16_t i40e_recv_scattered_burst_vec_avx512(void *rx_queue, @@ -694,7 +692,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue, uint16_t nb_pkts) { struct i40e_rx_queue *rxq = rx_queue; - uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; + uint8_t split_flags[I40E_VPMD_RX_BURST] = {0}; /* get some new buffers */ uint16_t nb_bufs = _recv_raw_pkts_vec_avx512(rxq, rx_pkts, nb_pkts, @@ -729,7 +727,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue, * vPMD receive routine that reassembles scattered packets. * Main receive routine that can handle arbitrary burst sizes * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ uint16_t i40e_recv_scattered_pkts_vec_avx512(void *rx_queue, @@ -738,12 +736,12 @@ i40e_recv_scattered_pkts_vec_avx512(void *rx_queue, { uint16_t retval = 0; - while (nb_pkts > RTE_I40E_VPMD_RX_BURST) { + while (nb_pkts > I40E_VPMD_RX_BURST) { uint16_t burst = i40e_recv_scattered_burst_vec_avx512(rx_queue, - rx_pkts + retval, RTE_I40E_VPMD_RX_BURST); + rx_pkts + retval, I40E_VPMD_RX_BURST); retval += burst; nb_pkts -= burst; - if (burst < RTE_I40E_VPMD_RX_BURST) + if (burst < I40E_VPMD_RX_BURST) return retval; } return retval + i40e_recv_scattered_burst_vec_avx512(rx_queue, diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c index 695b4e1040..955382652c 100644 --- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c +++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c @@ -33,21 +33,21 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) /* Pull 'n' more MBUFs into the software ring */ if (unlikely(rte_mempool_get_bulk(rxq->mp, (void *)rxep, - RTE_I40E_RXQ_REARM_THRESH) < 0)) { - if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= + I40E_VPMD_RXQ_REARM_THRESH) < 0)) { + if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >= rxq->nb_rx_desc) { - for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) { rxep[i].mbuf = &rxq->fake_mbuf; vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero); } } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - RTE_I40E_RXQ_REARM_THRESH; + I40E_VPMD_RXQ_REARM_THRESH; return; } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ - for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { + for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) { mb0 = rxep[0].mbuf; mb1 = rxep[1].mbuf; @@ -62,7 +62,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1); } - rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; + rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH; rx_id = rxq->rxrearm_start - 1; if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) { @@ -70,7 +70,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) rx_id = rxq->nb_rx_desc - 1; } - rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; + rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH; rte_io_wmb(); /* Update the tail pointer on the NIC */ @@ -325,11 +325,11 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts, } /** - * vPMD raw receive routine, only accept(nb_pkts >= RTE_I40E_DESCS_PER_LOOP) + * vPMD raw receive routine, only accept(nb_pkts >= I40E_VPMD_DESCS_PER_LOOP) * * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet - * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet + * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two */ static inline uint16_t _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, @@ -368,8 +368,8 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, 0, 0, 0 /* ignore non-length fields */ }; - /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); + /* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP); /* Just the act of getting into the function from the application is * going to cost about 7 cycles @@ -381,7 +381,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ - if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) + if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH) i40e_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if @@ -405,9 +405,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; - pos += RTE_I40E_DESCS_PER_LOOP, - rxdp += RTE_I40E_DESCS_PER_LOOP) { - uint64x2_t descs[RTE_I40E_DESCS_PER_LOOP]; + pos += I40E_VPMD_DESCS_PER_LOOP, + rxdp += I40E_VPMD_DESCS_PER_LOOP) { + uint64x2_t descs[I40E_VPMD_DESCS_PER_LOOP]; uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; uint16x8x2_t sterr_tmp1, sterr_tmp2; uint64x2_t mbp1, mbp2; @@ -502,9 +502,8 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); - if (likely(pos + RTE_I40E_DESCS_PER_LOOP < nb_pkts)) { - rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP); - } + if (likely(pos + I40E_VPMD_DESCS_PER_LOOP < nb_pkts)) + rte_prefetch_non_temporal(rxdp + I40E_VPMD_DESCS_PER_LOOP); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = vzipq_u16(vreinterpretq_u16_u64(descs[1]), @@ -538,7 +537,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, /* store the resulting 32-bit value */ vst1q_lane_u32((uint32_t *)split_packet, vreinterpretq_u32_u8(eop_bits), 0); - split_packet += RTE_I40E_DESCS_PER_LOOP; + split_packet += I40E_VPMD_DESCS_PER_LOOP; /* zero-out next pointers */ rx_pkts[pos]->next = NULL; @@ -555,7 +554,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, /* C.4 calc available number of desc */ if (unlikely(stat == 0)) { - nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP; + nb_pkts_recd += I40E_VPMD_DESCS_PER_LOOP; } else { nb_pkts_recd += rte_ctz64(stat) / I40E_UINT16_BIT; break; @@ -572,7 +571,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, /* * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ uint16_t i40e_recv_pkts_vec(void *__rte_restrict rx_queue, @@ -585,7 +584,7 @@ i40e_recv_pkts_vec(void *__rte_restrict rx_queue, * vPMD receive routine that reassembles single burst of 32 scattered packets * * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ static uint16_t i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -593,7 +592,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, { struct i40e_rx_queue *rxq = rx_queue; - uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; + uint8_t split_flags[I40E_VPMD_RX_BURST] = {0}; /* get some new buffers */ uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, @@ -633,15 +632,15 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, { uint16_t retval = 0; - while (nb_pkts > RTE_I40E_VPMD_RX_BURST) { + while (nb_pkts > I40E_VPMD_RX_BURST) { uint16_t burst; burst = i40e_recv_scattered_burst_vec(rx_queue, rx_pkts + retval, - RTE_I40E_VPMD_RX_BURST); + I40E_VPMD_RX_BURST); retval += burst; nb_pkts -= burst; - if (burst < RTE_I40E_VPMD_RX_BURST) + if (burst < I40E_VPMD_RX_BURST) return retval; } diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c index 920089fe3e..7e7f4c0895 100644 --- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c +++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c @@ -31,23 +31,23 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mp, (void *)rxep, - RTE_I40E_RXQ_REARM_THRESH) < 0) { - if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= + I40E_VPMD_RXQ_REARM_THRESH) < 0) { + if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >= rxq->nb_rx_desc) { dma_addr0 = _mm_setzero_si128(); - for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) { rxep[i].mbuf = &rxq->fake_mbuf; _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read), dma_addr0); } } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - RTE_I40E_RXQ_REARM_THRESH; + I40E_VPMD_RXQ_REARM_THRESH; return; } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ - for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { + for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) { __m128i vaddr0, vaddr1; mb0 = rxep[0].mbuf; @@ -72,7 +72,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1); } - rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; + rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH; rx_id = rxq->rxrearm_start - 1; if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) { @@ -80,7 +80,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) rx_id = rxq->nb_rx_desc - 1; } - rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; + rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH; /* Update the tail pointer on the NIC */ I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id); @@ -340,11 +340,11 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts, } /** - * vPMD raw receive routine, only accept(nb_pkts >= RTE_I40E_DESCS_PER_LOOP) + * vPMD raw receive routine, only accept(nb_pkts >= I40E_VPMD_DESCS_PER_LOOP) * * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet - * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet + * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two */ static inline uint16_t _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, @@ -376,8 +376,8 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); __m128i dd_check, eop_check; - /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); + /* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP); /* Just the act of getting into the function from the application is * going to cost about 7 cycles @@ -389,7 +389,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ - if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) + if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH) i40e_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if @@ -443,9 +443,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; - pos += RTE_I40E_DESCS_PER_LOOP, - rxdp += RTE_I40E_DESCS_PER_LOOP) { - __m128i descs[RTE_I40E_DESCS_PER_LOOP]; + pos += I40E_VPMD_DESCS_PER_LOOP, + rxdp += I40E_VPMD_DESCS_PER_LOOP) { + __m128i descs[I40E_VPMD_DESCS_PER_LOOP]; __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; __m128i zero, staterr, sterr_tmp1, sterr_tmp2; /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ @@ -559,7 +559,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); - split_packet += RTE_I40E_DESCS_PER_LOOP; + split_packet += I40E_VPMD_DESCS_PER_LOOP; } /* C.3 calc available number of desc */ @@ -575,7 +575,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* C.4 calc available number of desc */ var = rte_popcount64(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; - if (likely(var != RTE_I40E_DESCS_PER_LOOP)) + if (likely(var != I40E_VPMD_DESCS_PER_LOOP)) break; } @@ -589,7 +589,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -602,7 +602,7 @@ i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, * vPMD receive routine that reassembles single burst of 32 scattered packets * * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet */ static uint16_t i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -610,7 +610,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, { struct i40e_rx_queue *rxq = rx_queue; - uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; + uint8_t split_flags[I40E_VPMD_RX_BURST] = {0}; /* get some new buffers */ uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, @@ -650,15 +650,15 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, { uint16_t retval = 0; - while (nb_pkts > RTE_I40E_VPMD_RX_BURST) { + while (nb_pkts > I40E_VPMD_RX_BURST) { uint16_t burst; burst = i40e_recv_scattered_burst_vec(rx_queue, rx_pkts + retval, - RTE_I40E_VPMD_RX_BURST); + I40E_VPMD_RX_BURST); retval += burst; nb_pkts -= burst; - if (burst < RTE_I40E_VPMD_RX_BURST) + if (burst < I40E_VPMD_RX_BURST) return retval; } -- 2.47.1