DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct
@ 2025-05-06 13:27 Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
                   ` (14 more replies)
  0 siblings, 15 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
anywhere in the codebase, and can be removed.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 95c80ac1b8..0c07ce3186 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -3296,17 +3296,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	/*
 	 * Modified to setup VFRDT for Virtual Function
 	 */
-	if (ixgbe_is_vf(dev)) {
+	if (ixgbe_is_vf(dev))
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
-	} else {
+	else
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
-	}
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 641f982b01..20a5c5a0af 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -85,7 +85,6 @@ struct ixgbe_rx_queue {
 	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
 	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
 	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
 	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
 	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

Currently, the stats structure is directly embedded in the queue structure.
We're about to move iavf driver to a common Rx queue structure, so we can't
have driver-specific structures that aren't pointers, inside the common
queue structure. To prepare, we replace direct embedding into the queue
structure with a pointer to the stats structure.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/iavf/iavf_ethdev.c |  2 +-
 drivers/net/intel/iavf/iavf_rxtx.c   | 21 ++++++++++++++++++---
 drivers/net/intel/iavf/iavf_rxtx.h   |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index b3dacbef84..5babd587b3 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -1870,7 +1870,7 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 		struct iavf_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
 		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
-		stats = &rxq->stats.ipsec_crypto;
+		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
 		ips->ierrors.count += stats->ierrors.count;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 5411eb6897..d23d2df807 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -619,6 +619,18 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	/* Allocate stats */
+	rxq->stats = rte_zmalloc_socket("iavf rxq stats",
+				 sizeof(struct iavf_rx_queue_stats),
+				 RTE_CACHE_LINE_SIZE,
+				 socket_id);
+	if (!rxq->stats) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for "
+			     "rx queue stats");
+		rte_free(rxq);
+		return -ENOMEM;
+	}
+
 	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
 		proto_xtr = vf->proto_xtr ? vf->proto_xtr[queue_idx] :
 				IAVF_PROTO_XTR_NONE;
@@ -677,6 +689,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 				   socket_id);
 	if (!rxq->sw_ring) {
 		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -693,6 +706,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!mz) {
 		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for RX");
 		rte_free(rxq->sw_ring);
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -1054,6 +1068,7 @@ iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 	iavf_rxq_release_mbufs_ops[q->rel_mbufs_type].release_mbufs(q);
 	rte_free(q->sw_ring);
 	rte_memzone_free(q->mz);
+	rte_free(q->stats);
 	rte_free(q);
 }
 
@@ -1581,7 +1596,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(rxm, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -1750,7 +1765,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(first_seg, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -2034,7 +2049,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
 			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
 			iavf_flex_rxd_to_ipsec_crypto_status(mb, &rxdp[j],
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
 			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 0b5d67e718..62b5a67c84 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -268,7 +268,7 @@ struct iavf_rx_queue {
 	uint8_t proto_xtr; /* protocol extraction type */
 	uint64_t xtr_ol_flag;
 		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats stats;
+	struct iavf_rx_queue_stats *stats;
 	uint64_t offloads;
 	uint64_t phc_time;
 	uint64_t hw_time_update;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 03/13] net/ixgbe: create common Rx queue structure
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 04/13] net/i40e: use the " Anatoly Burakov
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin

In preparation for deduplication effort, generalize the Rx queue structure.

Most of the fields are simply moved to common/rx.h, clarifying the comments
where necessary. There are some instances where the field is renamed when
moving, to make it more consistent with the rest of the codebase.

Specifically, the following fields are renamed:

- rdt_reg_addr -> qrx_tail (Rx ring tail register address)
- rx_using_sse -> vector_rx (indicates if vectorized path is enabled)
- mb_pool -> mp (other drivers use this name)

Additionally, some per-driver defines are now also moved to aforementioned
common Rx header, and re-defined in the driver using said common values.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  62 ++++++++
 drivers/net/intel/ixgbe/ixgbe_ethdev.c        |   8 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    |   8 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 149 +++++++++---------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  67 +-------
 .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |   4 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c |  22 +--
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  |  22 +--
 8 files changed, 172 insertions(+), 170 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index abb01ba5e7..524de39f9c 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -11,6 +11,68 @@
 #include <rte_ethdev.h>
 
 #define CI_RX_BURST 32
+#define CI_RX_MAX_BURST 32
+
+struct ci_rx_queue;
+
+struct ci_rx_entry {
+	struct rte_mbuf *mbuf; /* mbuf associated with RX descriptor. */
+};
+
+struct ci_rx_entry_sc {
+	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct ci_rx_queue {
+	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
+	union { /* RX ring virtual address */
+		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+	};
+	volatile uint8_t *qrx_tail;   /**< register address of tail */
+	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
+	struct ci_rx_entry_sc *sw_sc_ring; /**< address of scattered Rx software ring. */
+	rte_iova_t rx_ring_phys_addr; /**< RX ring DMA address. */
+	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+	/** hold packets to return to application */
+	struct rte_mbuf *rx_stage[CI_RX_MAX_BURST * 2];
+	uint16_t nb_rx_desc; /**< number of RX descriptors. */
+	uint16_t rx_tail;  /**< current value of tail register. */
+	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
+	uint16_t nb_rx_hold; /**< number of held free RX desc. */
+	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
+	uint16_t rx_free_thresh; /**< max free RX desc to hold. */
+	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
+	uint16_t rxrearm_nb;     /**< number of remaining to be re-armed */
+	uint16_t rxrearm_start;  /**< the idx we start the re-arming from */
+	uint16_t queue_id; /**< RX queue index. */
+	uint16_t port_id;  /**< Device port identifier. */
+	uint16_t reg_idx;  /**< RX queue register index. */
+	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool vector_rx; /**< indicates that vector RX is in use */
+	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
+	uint64_t mbuf_initializer; /**< value to init mbufs */
+	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
+	struct rte_mbuf fake_mbuf;
+	const struct rte_memzone *mz;
+	union {
+		struct { /* ixgbe specific values */
+			/** indicates that IPsec RX feature is in use */
+			uint8_t using_ipsec;
+			/** Packet type mask for different NICs. */
+			uint16_t pkt_type_mask;
+			/** UDP frames with a 0 checksum can be marked as checksum errors. */
+			uint8_t rx_udp_csum_zero_err;
+			/** flags to set in mbuf when a vlan is detected. */
+			uint64_t vlan_flags;
+		};
+	};
+};
 
 static inline uint16_t
 ci_rx_reassemble_packets(struct rte_mbuf **rx_bufs, uint16_t nb_bufs, uint8_t *split_flags,
diff --git a/drivers/net/intel/ixgbe/ixgbe_ethdev.c b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
index f1fd271a0a..df1eecc3c1 100644
--- a/drivers/net/intel/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
@@ -2022,7 +2022,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
 {
 	struct ixgbe_hwstrip *hwstrip =
 		IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
 		return;
@@ -2157,7 +2157,7 @@ ixgbe_vlan_hw_strip_config(struct rte_eth_dev *dev)
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t ctrl;
 	uint16_t i;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool on;
 
 	PMD_INIT_FUNC_TRACE();
@@ -2200,7 +2200,7 @@ ixgbe_config_vlan_strip_on_all_queues(struct rte_eth_dev *dev, int mask)
 {
 	uint16_t i;
 	struct rte_eth_rxmode *rxmode;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (mask & RTE_ETH_VLAN_STRIP_MASK) {
 		rxmode = &dev->data->dev_conf.rxmode;
@@ -5789,7 +5789,7 @@ ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
 static int
 ixgbevf_vlan_offload_config(struct rte_eth_dev *dev, int mask)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t i;
 	int on = 0;
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index c1b086ef6d..1df1787c7f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -11,15 +11,15 @@
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union ixgbe_adv_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
@@ -42,7 +42,7 @@ ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 0c07ce3186..4e4afd81e4 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -1423,11 +1423,11 @@ int
 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = &rxq->ixgbe_rx_ring[desc];
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.upper.status_error;
 
@@ -1567,10 +1567,10 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t pkt_flags;
@@ -1582,7 +1582,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 	uint64_t vlan_flags = rxq->vlan_flags;
 
 	/* get references to current descriptor and S/W ring entry */
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	status = rxdp->wb.upper.status_error;
@@ -1667,10 +1667,10 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 }
 
 static inline int
-ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
+ixgbe_rx_alloc_bufs(struct ci_rx_queue *rxq, bool reset_mbuf)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx;
 	__le64 dma_addr;
@@ -1679,12 +1679,12 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 	/* allocate buffers in bulk directly into the S/W ring */
 	alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
 	rxep = &rxq->sw_ring[alloc_idx];
-	diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
+	diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep,
 				    rxq->rx_free_thresh);
 	if (unlikely(diag != 0))
 		return -ENOMEM;
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = &rxq->ixgbe_rx_ring[alloc_idx];
 	for (i = 0; i < rxq->rx_free_thresh; ++i) {
 		/* populate the static rte mbuf fields */
 		mb = rxep[i].mbuf;
@@ -1711,7 +1711,7 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 }
 
 static inline uint16_t
-ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+ixgbe_rx_fill_from_stage(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
 	struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
@@ -1735,7 +1735,7 @@ static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	     uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	/* Any previously recv'd pkts will be returned from the Rx stage */
@@ -1778,8 +1778,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		/* update tail pointer */
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
-					    cur_free_trigger);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, cur_free_trigger);
 	}
 
 	if (rxq->rx_tail >= rxq->nb_rx_desc)
@@ -1825,11 +1824,11 @@ uint16_t
 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union ixgbe_adv_rx_desc *rx_ring;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
-	struct ixgbe_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
 	union ixgbe_adv_rx_desc rxd;
@@ -1847,7 +1846,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = rxq->ixgbe_rx_ring;
 	sw_ring = rxq->sw_ring;
 	vlan_flags = rxq->vlan_flags;
 	while (nb_rx < nb_pkts) {
@@ -1908,7 +1907,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) rx_id, (unsigned) staterr,
 			   (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
 
-		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (nmb == NULL) {
 			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
 				   "queue_id=%u", (unsigned) rxq->port_id,
@@ -2017,7 +2016,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) nb_rx);
 		rx_id = (uint16_t) ((rx_id == 0) ?
 				     (rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+		IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -2052,7 +2051,7 @@ static inline void
 ixgbe_fill_cluster_head_buf(
 	struct rte_mbuf *head,
 	union ixgbe_adv_rx_desc *desc,
-	struct ixgbe_rx_queue *rxq,
+	struct ci_rx_queue *rxq,
 	uint32_t staterr)
 {
 	uint32_t pkt_info;
@@ -2114,10 +2113,10 @@ static inline uint16_t
 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 		    bool bulk_alloc)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
-	struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
-	struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->ixgbe_rx_ring;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry_sc *sw_sc_ring = rxq->sw_sc_ring;
 	uint16_t rx_id = rxq->rx_tail;
 	uint16_t nb_rx = 0;
 	uint16_t nb_hold = rxq->nb_rx_hold;
@@ -2125,10 +2124,10 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 
 	while (nb_rx < nb_pkts) {
 		bool eop;
-		struct ixgbe_rx_entry *rxe;
-		struct ixgbe_scattered_rx_entry *sc_entry;
-		struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
-		struct ixgbe_rx_entry *next_rxe = NULL;
+		struct ci_rx_entry *rxe;
+		struct ci_rx_entry_sc *sc_entry;
+		struct ci_rx_entry_sc *next_sc_entry = NULL;
+		struct ci_rx_entry *next_rxe = NULL;
 		struct rte_mbuf *first_seg;
 		struct rte_mbuf *rxm;
 		struct rte_mbuf *nmb = NULL;
@@ -2165,7 +2164,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rte_le_to_cpu_16(rxd.wb.upper.length));
 
 		if (!bulk_alloc) {
-			nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+			nmb = rte_mbuf_raw_alloc(rxq->mp);
 			if (nmb == NULL) {
 				PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
 						  "port_id=%u queue_id=%u",
@@ -2181,7 +2180,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			if (!ixgbe_rx_alloc_bufs(rxq, false)) {
 				rte_wmb();
 				IXGBE_PCI_REG_WC_WRITE_RELAXED(
-							rxq->rdt_reg_addr,
+							rxq->qrx_tail,
 							next_rdt);
 				nb_hold -= rxq->rx_free_thresh;
 			} else {
@@ -2347,7 +2346,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
 
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, prev_id);
 		nb_hold = 0;
 	}
 
@@ -2969,12 +2968,12 @@ ixgbe_free_sc_cluster(struct rte_mbuf *m)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		ixgbe_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -3006,7 +3005,7 @@ ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release(struct ci_rx_queue *rxq)
 {
 	if (rxq != NULL) {
 		ixgbe_rx_queue_release_mbufs(rxq);
@@ -3032,7 +3031,7 @@ ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
  *           function must be used.
  */
 static inline int __rte_cold
-check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -3069,7 +3068,7 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
 
 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
 static void __rte_cold
-ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
+ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ci_rx_queue *rxq)
 {
 	static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
 	unsigned i;
@@ -3090,7 +3089,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
 	 * reads extra memory as zeros.
 	 */
 	for (i = 0; i < len; i++) {
-		rxq->rx_ring[i] = zeroed_desc;
+		rxq->ixgbe_rx_ring[i] = zeroed_desc;
 	}
 
 	/*
@@ -3205,7 +3204,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			 struct rte_mempool *mp)
 {
 	const struct rte_memzone *rz;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_hw     *hw;
 	uint16_t len;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
@@ -3234,11 +3233,11 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 
 	/* First allocate the rx queue data structure */
-	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
+	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE, socket_id);
 	if (rxq == NULL)
 		return -ENOMEM;
-	rxq->mb_pool = mp;
+	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
 	rxq->queue_id = queue_idx;
@@ -3297,14 +3296,14 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 * Modified to setup VFRDT for Virtual Function
 	 */
 	if (ixgbe_is_vf(dev))
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
 	else
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
+	rxq->ixgbe_rx_ring = (union ixgbe_adv_rx_desc *)rz->addr;
 
 	/*
 	 * Certain constraints must be met in order to use the bulk buffer
@@ -3329,7 +3328,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		len += RTE_PMD_IXGBE_RX_MAX_BURST;
 
 	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
-					  sizeof(struct ixgbe_rx_entry) * len,
+					  sizeof(struct ci_rx_entry) * len,
 					  RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3346,7 +3345,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 */
 	rxq->sw_sc_ring =
 		rte_zmalloc_socket("rxq->sw_sc_ring",
-				   sizeof(struct ixgbe_scattered_rx_entry) * len,
+				   sizeof(struct ci_rx_entry_sc) * len,
 				   RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_sc_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3355,7 +3354,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
 			    "dma_addr=0x%"PRIx64,
-		     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
+		     rxq->sw_ring, rxq->sw_sc_ring, rxq->ixgbe_rx_ring,
 		     rxq->rx_ring_phys_addr);
 
 	if (!rte_is_power_of_2(nb_desc)) {
@@ -3379,11 +3378,11 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 {
 #define IXGBE_RXQ_SCAN_INTERVAL 4
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 
 	while ((desc < rxq->nb_rx_desc) &&
 		(rxdp->wb.upper.status_error &
@@ -3391,7 +3390,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 		desc += IXGBE_RXQ_SCAN_INTERVAL;
 		rxdp += IXGBE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
+			rxdp = &(rxq->ixgbe_rx_ring[rxq->rx_tail +
 				desc - rxq->nb_rx_desc]);
 	}
 
@@ -3401,7 +3400,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 int
 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint32_t *status;
 	uint32_t nb_hold, desc;
 
@@ -3409,7 +3408,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 		return -EINVAL;
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-	if (rxq->rx_using_sse)
+	if (rxq->vector_rx)
 		nb_hold = rxq->rxrearm_nb;
 	else
 #endif
@@ -3421,7 +3420,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.upper.status_error;
+	status = &rxq->ixgbe_rx_ring[desc].wb.upper.status_error;
 	if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
 		return RTE_ETH_RX_DESC_DONE;
 
@@ -3506,7 +3505,7 @@ ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
 	}
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 		if (rxq != NULL) {
 			ixgbe_rx_queue_release_mbufs(rxq);
@@ -4668,16 +4667,16 @@ ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
 }
 
 static int __rte_cold
-ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ixgbe_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	unsigned int i;
 
 	/* Initialize software ring entries */
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ixgbe_adv_rx_desc *rxd;
-		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
+		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (mbuf == NULL) {
 			PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
@@ -4690,7 +4689,7 @@ ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
 
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
-		rxd = &rxq->rx_ring[i];
+		rxd = &rxq->ixgbe_rx_ring[i];
 		rxd->read.hdr_addr = 0;
 		rxd->read.pkt_addr = dma_addr;
 		rxe[i].mbuf = mbuf;
@@ -5109,9 +5108,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 		dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
-		rxq->rx_using_sse = rx_using_sse;
+		rxq->vector_rx = rx_using_sse;
 #ifdef RTE_LIB_SECURITY
 		rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_SECURITY);
@@ -5187,7 +5186,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 
 	/* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		uint32_t srrctl =
 			IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
 		uint32_t rscctl =
@@ -5217,7 +5216,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 		 */
 
 		rscctl |= IXGBE_RSCCTL_RSCEN;
-		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
+		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mp);
 		psrtype |= IXGBE_PSRTYPE_TCPHDR;
 
 		/*
@@ -5263,7 +5262,7 @@ int __rte_cold
 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint64_t bus_addr;
 	uint32_t rxctrl;
 	uint32_t fctrl;
@@ -5374,7 +5373,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -5559,7 +5558,7 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t dmatxctl;
 	uint32_t rxctrl;
@@ -5646,7 +5645,7 @@ int __rte_cold
 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5689,7 +5688,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5823,11 +5822,11 @@ void
 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
-	qinfo->mp = rxq->mb_pool;
+	qinfo->mp = rxq->mp;
 	qinfo->scattered_rx = dev->data->scattered_rx;
 	qinfo->nb_desc = rxq->nb_rx_desc;
 
@@ -5861,13 +5860,13 @@ void
 ixgbe_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
 
 	rxq = dev->data->rx_queues[queue_id];
 
 	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
-	recycle_rxq_info->mp = rxq->mb_pool;
+	recycle_rxq_info->mp = rxq->mp;
 	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
 	recycle_rxq_info->receive_tail = &rxq->rx_tail;
 
@@ -5889,7 +5888,7 @@ int __rte_cold
 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
 	uint64_t bus_addr;
@@ -5972,7 +5971,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -6076,7 +6075,7 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t rxdctl;
 	uint16_t i;
@@ -6270,7 +6269,7 @@ ixgbe_recv_scattered_pkts_vec(
 }
 
 int
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue __rte_unused * rxq)
 {
 	return -1;
 }
@@ -6290,7 +6289,7 @@ ixgbe_txq_vec_setup(struct ci_tx_queue *txq __rte_unused)
 }
 
 void
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue __rte_unused * rxq)
 {
 	return;
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 20a5c5a0af..84e28eb254 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _IXGBE_RXTX_H_
 #define _IXGBE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 
 /*
@@ -30,7 +31,7 @@
 #define	IXGBE_MAX_RING_DESC	8192
 
 #define RTE_PMD_IXGBE_TX_MAX_BURST 32
-#define RTE_PMD_IXGBE_RX_MAX_BURST 32
+#define RTE_PMD_IXGBE_RX_MAX_BURST CI_RX_MAX_BURST
 #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
 
 #define RTE_IXGBE_DESCS_PER_LOOP    4
@@ -66,66 +67,6 @@
 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
 #define IXGBE_PACKET_TYPE_SHIFT             0X04
 
-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct ixgbe_rx_entry {
-	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-struct ixgbe_scattered_rx_entry {
-	struct rte_mbuf *fbuf; /**< First segment of the fragmented packet. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct ixgbe_rx_queue {
-	struct rte_mempool  *mb_pool; /**< mbuf pool to populate RX ring. */
-	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
-	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
-	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
-	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
-	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
-	uint64_t            mbuf_initializer; /**< value to init mbufs */
-	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
-	uint16_t            rx_tail;  /**< current value of RDT register. */
-	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
-	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
-	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	uint8_t            rx_using_sse;
-	/**< indicates that vector RX is in use */
-#ifdef RTE_LIB_SECURITY
-	uint8_t            using_ipsec;
-	/**< indicates that IPsec RX feature is in use */
-#endif
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
-	uint16_t            rxrearm_nb;     /**< number of remaining to be re-armed */
-	uint16_t            rxrearm_start;  /**< the idx we start the re-arming from */
-#endif
-	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
-	uint16_t            queue_id; /**< RX queue index. */
-	uint16_t            reg_idx;  /**< RX queue register index. */
-	uint16_t            pkt_type_mask;  /**< Packet type mask for different NICs. */
-	uint16_t            port_id;  /**< Device port identifier. */
-	uint8_t             crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
-	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
-	uint8_t             rx_deferred_start; /**< not in global dev start. */
-	/** UDP frames with a 0 checksum can be marked as checksum errors. */
-	uint8_t             rx_udp_csum_zero_err;
-	/** flags to set in mbuf when a vlan is detected. */
-	uint64_t            vlan_flags;
-	uint64_t	    offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
-	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
-	struct rte_mbuf fake_mbuf;
-	/** hold packets to return to application */
-	struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
-	const struct rte_memzone *mz;
-};
-
 /**
  * IXGBE CTX Constants
  */
@@ -230,8 +171,8 @@ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t ixgbe_recv_scattered_pkts_vec(void *rx_queue,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);
-void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
+int ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq);
+void ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 int ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt);
 
 extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
index 018010820f..0ba3d7a4c0 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
@@ -69,7 +69,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 }
 
 static inline void
-_ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+_ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 
@@ -173,7 +173,7 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 		return -1;
 
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads))
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 9ccd8eba25..630a2e6a1d 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -12,22 +12,22 @@
 #include "ixgbe_rxtx_vec_common.h"
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 	uint8x8_t p;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
+	if (unlikely(rte_mempool_get_bulk(rxq->mp,
 					  (void *)rxep,
 					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -76,7 +76,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 static inline void
@@ -282,11 +282,11 @@ desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint8x16_t shuf_msk = {
@@ -309,7 +309,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -488,7 +488,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -634,7 +634,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -657,7 +657,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index e125f52cc5..ecfb0d6ba6 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -13,12 +13,12 @@
 #include <rte_vect.h>
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
@@ -26,10 +26,10 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 
 	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mb_pool,
+	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
 				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -86,7 +86,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
 
 #ifdef RTE_LIB_SECURITY
@@ -327,11 +327,11 @@ desc_to_ptype_v(__m128i descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 #ifdef RTE_LIB_SECURITY
 	uint8_t use_ipsec = rxq->using_ipsec;
@@ -377,7 +377,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,7 +755,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -778,7 +778,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 04/13] net/i40e: use the common Rx queue structure
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 05/13] net/ice: " Anatoly Burakov
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

Make the i40e driver use the new common Rx queue structure.

Because the i40e driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_I40E_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue structure
will have both, while the actual descriptor format is picked by i40e at
compile time using the above macro. Direct usage of Rx queue structure is
now meant to be replaced with a macro access that takes descriptor size
into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  14 ++
 drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
 drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
 drivers/net/intel/i40e/i40e_fdir.c            |  16 +--
 .../i40e/i40e_recycle_mbufs_vec_common.c      |   6 +-
 drivers/net/intel/i40e/i40e_rxtx.c            | 126 +++++++++---------
 drivers/net/intel/i40e/i40e_rxtx.h            |  74 +++-------
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h |   6 +-
 .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  22 +--
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  12 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  12 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  24 ++--
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  24 ++--
 14 files changed, 160 insertions(+), 188 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 524de39f9c..db49db57d0 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -30,6 +30,8 @@ struct ci_rx_queue {
 	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
 	union { /* RX ring virtual address */
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
+		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -51,14 +53,22 @@ struct ci_rx_queue {
 	uint16_t queue_id; /**< RX queue index. */
 	uint16_t port_id;  /**< Device port identifier. */
 	uint16_t reg_idx;  /**< RX queue register index. */
+	uint16_t rx_buf_len; /* The packet buffer size */
+	uint16_t rx_hdr_len; /* The header buffer size */
+	uint16_t max_pkt_len; /* Maximum packet length */
 	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool q_set; /**< indicate if rx queue has been configured */
 	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
 	bool vector_rx; /**< indicates that vector RX is in use */
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
+	union { /* the VSI this queue belongs to */
+		struct i40e_vsi *i40e_vsi;
+	};
 	const struct rte_memzone *mz;
 	union {
 		struct { /* ixgbe specific values */
@@ -71,6 +81,10 @@ struct ci_rx_queue {
 			/** flags to set in mbuf when a vlan is detected. */
 			uint64_t vlan_flags;
 		};
+		struct { /* i40e specific values */
+			uint8_t hs_mode; /**< Header Split mode */
+			uint8_t dcb_tc; /**< Traffic class of rx queue */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/i40e/i40e_ethdev.c b/drivers/net/intel/i40e/i40e_ethdev.c
index 90eba3419f..e0a865845b 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.c
+++ b/drivers/net/intel/i40e/i40e_ethdev.c
@@ -6609,7 +6609,7 @@ i40e_dev_rx_init(struct i40e_pf *pf)
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int ret = I40E_SUCCESS;
 	uint16_t i;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	i40e_pf_config_rss(pf);
 	for (i = 0; i < data->nb_rx_queues; i++) {
@@ -8974,7 +8974,7 @@ i40e_pf_calc_configured_queues_num(struct i40e_pf *pf)
 {
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int i, num;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	num = 0;
 	for (i = 0; i < pf->lan_nb_qps; i++) {
diff --git a/drivers/net/intel/i40e/i40e_ethdev.h b/drivers/net/intel/i40e/i40e_ethdev.h
index ccc8732d7d..44864292d0 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.h
+++ b/drivers/net/intel/i40e/i40e_ethdev.h
@@ -333,7 +333,7 @@ struct i40e_vsi_list {
 	struct i40e_vsi *vsi;
 };
 
-struct i40e_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 /* Bandwidth limit information */
@@ -739,7 +739,7 @@ struct i40e_fdir_info {
 	struct i40e_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	uint16_t match_counter_index;  /* Statistic counter index used for fdir*/
 	struct ci_tx_queue *txq;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt[I40E_FDIR_PRG_PKT_CNT];     /* memory for fdir program packet */
 	uint64_t dma_addr[I40E_FDIR_PRG_PKT_CNT]; /* physic address of packet memory*/
 	/*
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 94e3ab44e3..eadcf63d1d 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -100,9 +100,9 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status);
 
 static int
-i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_fdir_rx_queue_init(struct ci_rx_queue *rxq)
 {
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct i40e_hmc_obj_rxq rx_ctx;
 	int err = I40E_SUCCESS;
 
@@ -139,7 +139,7 @@ i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
 		return err;
 	}
 	rxq->qrx_tail = hw->hw_addr +
-		I40E_QRX_TAIL(rxq->vsi->base_queue);
+		I40E_QRX_TAIL(rxq->i40e_vsi->base_queue);
 
 	rte_wmb();
 	/* Init the RX tail register. */
@@ -382,7 +382,7 @@ i40e_fdir_rx_proc_enable(struct rte_eth_dev *dev, bool on)
 	int32_t i;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		rxq->fdir_enabled = on;
@@ -929,7 +929,7 @@ i40e_build_ctob(uint32_t td_cmd,
  * tx queue
  */
 static inline int
-i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
+i40e_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
 	uint64_t qword1;
@@ -938,7 +938,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 	uint32_t error;
 	int ret = 0;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & I40E_RXD_QW1_STATUS_MASK)
 			>> I40E_RXD_QW1_STATUS_SHIFT;
@@ -987,7 +987,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 }
 
 static inline void
-i40e_fdir_programming_status_cleanup(struct i40e_rx_queue *rxq)
+i40e_fdir_programming_status_cleanup(struct ci_rx_queue *rxq)
 {
 	uint16_t retry_count = 0;
 
@@ -1627,7 +1627,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct i40e_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	const struct i40e_fdir_action *fdir_action = &filter->action;
 	volatile struct i40e_tx_desc *txdp;
 	volatile struct i40e_filter_program_desc *fdirdp;
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 2875c578af..aa7703216d 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -13,15 +13,15 @@
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union i40e_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index c3ff2e05c3..96490296ba 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -94,12 +94,12 @@ i40e_monitor_callback(const uint64_t value,
 int
 i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile union i40e_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = I40E_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.qword1.status_error_len;
 
@@ -416,9 +416,9 @@ i40e_xmit_cleanup(struct ci_tx_queue *txq)
 
 static inline int
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-check_rx_burst_bulk_alloc_preconditions(struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 #else
-check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(__rte_unused struct ci_rx_queue *rxq)
 #endif
 {
 	int ret = 0;
@@ -456,10 +456,10 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
 #error "PMD I40E: I40E_LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
+i40e_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -467,9 +467,9 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 	int32_t s[I40E_LOOK_AHEAD], var, nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
@@ -558,7 +558,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 }
 
 static inline uint16_t
-i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
+i40e_rx_fill_from_stage(struct ci_rx_queue *rxq,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
@@ -577,10 +577,10 @@ i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
 }
 
 static inline int
-i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
+i40e_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -597,7 +597,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = I40E_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -629,7 +629,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = (struct i40e_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	struct rte_eth_dev *dev;
 	uint16_t nb_rx = 0;
 
@@ -648,7 +648,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (i40e_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 
@@ -707,12 +707,12 @@ i40e_recv_pkts_bulk_alloc(void __rte_unused *rx_queue,
 uint16_t
 i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union i40e_rx_desc *rx_ring;
 	volatile union i40e_rx_desc *rxdp;
 	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -729,9 +729,9 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = I40E_RX_RING(rxq);
 	sw_ring = rxq->sw_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -745,7 +745,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -837,12 +837,12 @@ i40e_recv_scattered_pkts(void *rx_queue,
 			 struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	volatile union i40e_rx_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union i40e_rx_desc *rx_ring = I40E_RX_RING(rxq);
 	volatile union i40e_rx_desc *rxdp;
 	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring = rxq->sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -853,7 +853,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 	uint64_t qword1;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -867,7 +867,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -1798,7 +1798,7 @@ i40e_get_queue_offset_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
 int
 i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -1841,7 +1841,7 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -2004,7 +2004,7 @@ i40e_dev_first_queue(uint16_t idx, void **queues, int num)
 
 static int
 i40e_dev_rx_queue_setup_runtime(struct rte_eth_dev *dev,
-				struct i40e_rx_queue *rxq)
+				struct ci_rx_queue *rxq)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
@@ -2081,7 +2081,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct i40e_vsi *vsi;
 	struct i40e_pf *pf = NULL;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size;
 	uint16_t len, i;
@@ -2116,7 +2116,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("i40e rx queue",
-				 sizeof(struct i40e_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -2135,7 +2135,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	else
 		rxq->crc_len = 0;
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->i40e_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->offloads = offloads;
 
@@ -2164,14 +2164,14 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	memset(rz->addr, 0, ring_size);
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	I40E_RX_RING(rxq) = (union i40e_rx_desc *)rz->addr;
 
 	len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST);
 
 	/* Allocate the software ring. */
 	rxq->sw_ring =
 		rte_zmalloc_socket("i40e rx sw ring",
-				   sizeof(struct i40e_rx_entry) * len,
+				   sizeof(struct ci_rx_entry) * len,
 				   RTE_CACHE_LINE_SIZE,
 				   socket_id);
 	if (!rxq->sw_ring) {
@@ -2242,7 +2242,7 @@ i40e_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 void
 i40e_rx_queue_release(void *rxq)
 {
-	struct i40e_rx_queue *q = (struct i40e_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -2260,11 +2260,11 @@ i40e_dev_rx_queue_count(void *rx_queue)
 {
 #define I40E_RXQ_SCAN_INTERVAL 4
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	while ((desc < rxq->nb_rx_desc) &&
 		((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT) &
@@ -2277,8 +2277,8 @@ i40e_dev_rx_queue_count(void *rx_queue)
 		desc += I40E_RXQ_SCAN_INTERVAL;
 		rxdp += I40E_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-					desc - rxq->nb_rx_desc]);
+			rxdp = I40E_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -2287,7 +2287,7 @@ i40e_dev_rx_queue_count(void *rx_queue)
 int
 i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -2302,7 +2302,7 @@ i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
+	status = &I40E_RX_RING_PTR(rxq, desc)->wb.qword1.status_error_len;
 	mask = rte_le_to_cpu_64((1ULL << I40E_RX_DESC_STATUS_DD_SHIFT)
 		<< I40E_RXD_QW1_STATUS_SHIFT);
 	if (*status & mask)
@@ -2628,12 +2628,12 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
 }
 
 void
-i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		i40e_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -2663,7 +2663,7 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 }
 
 void
-i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
+i40e_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 	uint16_t len;
@@ -2681,7 +2681,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 		len = rxq->nb_rx_desc;
 
 	for (i = 0; i < len * sizeof(union i40e_rx_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)I40E_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; ++i)
@@ -2898,9 +2898,9 @@ i40e_tx_queue_init(struct ci_tx_queue *txq)
 }
 
 int
-i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
+i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct i40e_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
@@ -2922,7 +2922,7 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = I40E_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
@@ -2941,10 +2941,10 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
  * and maximum packet length.
  */
 static int
-i40e_rx_queue_config(struct i40e_rx_queue *rxq)
+i40e_rx_queue_config(struct ci_rx_queue *rxq)
 {
-	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->vsi);
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->i40e_vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct rte_eth_dev_data *data = pf->dev_data;
 	uint16_t buf_size;
 
@@ -2988,11 +2988,11 @@ i40e_rx_queue_config(struct i40e_rx_queue *rxq)
 
 /* Init the RX queue in hardware */
 int
-i40e_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_rx_queue_init(struct ci_rx_queue *rxq)
 {
 	int err = I40E_SUCCESS;
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
-	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
+	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->i40e_vsi);
 	uint16_t pf_q = rxq->reg_idx;
 	uint16_t buf_size;
 	struct i40e_hmc_obj_rxq rx_ctx;
@@ -3166,7 +3166,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 enum i40e_status_code
 i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -3180,7 +3180,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("i40e fdir rx queue",
-				  sizeof(struct i40e_rx_queue),
+				  sizeof(struct ci_rx_queue),
 				  RTE_CACHE_LINE_SIZE,
 				  SOCKET_ID_ANY);
 	if (!rxq) {
@@ -3206,11 +3206,11 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 	rxq->nb_rx_desc = I40E_FDIR_NUM_RX_DESC;
 	rxq->queue_id = I40E_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->i40e_vsi = pf->fdir.fdir_vsi;
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, I40E_FDIR_NUM_RX_DESC * sizeof(union i40e_rx_desc));
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	I40E_RX_RING(rxq) = (union i40e_rx_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -3226,7 +3226,7 @@ void
 i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -3264,7 +3264,7 @@ void
 i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
@@ -3335,7 +3335,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 		if (ad->rx_vec_allowed) {
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
-				struct i40e_rx_queue *rxq =
+				struct ci_rx_queue *rxq =
 					dev->data->rx_queues[i];
 
 				if (rxq && i40e_rxq_vec_setup(rxq)) {
@@ -3438,10 +3438,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 			 dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
 
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
-			struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+			struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 			if (rxq)
-				rxq->rx_using_sse = rx_using_sse;
+				rxq->vector_rx = rx_using_sse;
 		}
 	}
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 2f32fc5686..4b5a84d8ef 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -6,8 +6,9 @@
 #define _I40E_RXTX_H_
 
 #include "../common/tx.h"
+#include "../common/rx.h"
 
-#define RTE_PMD_I40E_RX_MAX_BURST 32
+#define RTE_PMD_I40E_RX_MAX_BURST CI_RX_MAX_BURST
 #define RTE_PMD_I40E_TX_MAX_BURST 32
 
 #define RTE_I40E_VPMD_RX_BURST        32
@@ -67,62 +68,19 @@ enum i40e_header_split_mode {
 			       I40E_HEADER_SPLIT_UDP_TCP | \
 			       I40E_HEADER_SPLIT_SCTP)
 
-/* HW desc structure, both 16-byte and 32-byte types are supported */
+/* HW desc structures, both 16-byte and 32-byte types are supported */
 #ifdef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 #define i40e_rx_desc i40e_16byte_rx_desc
+#define I40E_RX_RING(rxq) \
+	((rxq)->i40e_rx_16b_ring)
 #else
 #define i40e_rx_desc i40e_32byte_rx_desc
+#define I40E_RX_RING(rxq) \
+	((rxq)->i40e_rx_32b_ring)
 #endif
 
-struct i40e_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
-/*
- * Structure associated with each RX queue.
- */
-struct i40e_rx_queue {
-	struct rte_mempool *mp; /**< mbuf pool to populate RX ring */
-	volatile union i40e_rx_desc *rx_ring;/**< RX ring virtual address */
-	uint64_t rx_ring_phys_addr; /**< RX ring DMA address */
-	struct i40e_rx_entry *sw_ring; /**< address of RX soft ring */
-	uint16_t nb_rx_desc; /**< number of RX descriptors */
-	uint16_t rx_free_thresh; /**< max free RX desc to hold */
-	uint16_t rx_tail; /**< current value of tail */
-	uint16_t nb_rx_hold; /**< number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[RTE_PMD_I40E_RX_MAX_BURST * 2];
-#endif
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /**< device port ID */
-	uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /**< 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /**< RX queue index */
-	uint16_t reg_idx; /**< RX queue register index */
-	uint8_t drop_en; /**< if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /**< register address of tail */
-	struct i40e_vsi *vsi; /**< the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	uint8_t hs_mode; /* Header Split mode */
-	bool q_set; /**< indicate if rx queue has been configured */
-	bool rx_deferred_start; /**< don't start this queue in dev start */
-	uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
-	uint8_t dcb_tc;         /**< Traffic class of rx queue */
-	uint64_t offloads; /**< Rx offload flags of RTE_ETH_RX_OFFLOAD_* */
-	const struct rte_memzone *mz;
-};
+#define I40E_RX_RING_PTR(rxq, entry) \
+	(I40E_RX_RING(rxq) + (entry))
 
 /** Offload features */
 union i40e_tx_offload {
@@ -172,16 +130,16 @@ uint16_t i40e_simple_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 int i40e_tx_queue_init(struct ci_tx_queue *txq);
-int i40e_rx_queue_init(struct i40e_rx_queue *rxq);
+int i40e_rx_queue_init(struct ci_rx_queue *rxq);
 void i40e_free_tx_resources(struct ci_tx_queue *txq);
-void i40e_free_rx_resources(struct i40e_rx_queue *rxq);
+void i40e_free_rx_resources(struct ci_rx_queue *rxq);
 void i40e_dev_clear_queues(struct rte_eth_dev *dev);
 void i40e_dev_free_queues(struct rte_eth_dev *dev);
-void i40e_reset_rx_queue(struct i40e_rx_queue *rxq);
+void i40e_reset_rx_queue(struct ci_rx_queue *rxq);
 void i40e_reset_tx_queue(struct ci_tx_queue *txq);
 int i40e_tx_done_cleanup(void *txq, uint32_t free_cnt);
-int i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq);
-void i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq);
+int i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq);
 
 uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
@@ -197,9 +155,9 @@ uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts);
 int i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int i40e_rxq_vec_setup(struct i40e_rx_queue *rxq);
+int i40e_rxq_vec_setup(struct ci_rx_queue *rxq);
 int i40e_txq_vec_setup(struct ci_tx_queue *txq);
-void i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 uint16_t i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				   uint16_t nb_pkts);
 void i40e_set_rx_function(struct rte_eth_dev *dev);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
index b66a808f9f..fd9447014b 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
@@ -13,14 +13,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index 42beff6e89..3e4109e82e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -16,13 +16,13 @@
 #include <rte_altivec.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
 
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 
 	__vector unsigned long hdr_room = (__vector unsigned long){
@@ -30,7 +30,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 						RTE_PKTMBUF_HEADROOM};
 	__vector unsigned long dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING(rxq) + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -195,16 +195,16 @@ desc_to_ptype_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__vector unsigned char shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__vector unsigned short crc_adjust = (__vector unsigned short){
 		0, 0,         /* ignore pkt_type field */
@@ -221,7 +221,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -465,7 +465,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -611,15 +611,15 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 9c406e7a6f..0f3f7430aa 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -16,7 +16,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, false);
 }
@@ -105,16 +105,16 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union i40e_rx_desc *rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 	rte_prefetch0(rxdp);
 
@@ -625,7 +625,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index d8244556c0..f2292b45e8 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -18,7 +18,7 @@
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, true);
 }
@@ -108,14 +108,14 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			  uint16_t nb_pkts, uint8_t *split_packet)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union i40e_rx_desc *rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -693,7 +693,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
index ba72df8e13..d19b9e4bf4 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
@@ -21,7 +21,7 @@ i40e_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+_i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned mask = rxq->nb_rx_desc - 1;
 	unsigned i;
@@ -68,7 +68,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 	 */
 	ad->rx_vec_allowed = true;
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads)) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index d16ceb6b5d..814aa666dc 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -17,18 +17,18 @@
 #include "i40e_rxtx_vec_common.h"
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
@@ -203,7 +203,7 @@ descs_to_fdir_16b(uint32x4_t fltstat, uint64x2_t descs[4], struct rte_mbuf **rx_
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	uint32x4_t vlan0, vlan1, rss, l3_l4e;
@@ -332,15 +332,15 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -374,7 +374,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -592,7 +592,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -738,15 +738,15 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 774519265b..74cd59e245 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -15,18 +15,18 @@
 #include <rte_vect.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -207,7 +207,7 @@ descs_to_fdir_16b(__m128i fltstat, __m128i descs[4], struct rte_mbuf **rx_pkt)
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 		  __m128i descs[4], struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -347,16 +347,16 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -382,7 +382,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,15 +755,15 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 05/13] net/ice: use the common Rx queue structure
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (2 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 04/13] net/i40e: use the " Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 06/13] net/iavf: " Anatoly Burakov
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson

Make the ice driver use the new common Rx queue structure.

Because the ice driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_ICE_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue
structure will have both, while the actual descriptor format is picked by
ice at compile time using the above macro. Direct usage of Rx queue
structure is now meant to be replaced with a macro access that takes
descriptor size into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h               |  22 +++
 drivers/net/intel/ice/ice_dcf.c             |   6 +-
 drivers/net/intel/ice/ice_dcf_ethdev.c      |  22 +--
 drivers/net/intel/ice/ice_ethdev.c          |   2 +-
 drivers/net/intel/ice/ice_ethdev.h          |   5 +-
 drivers/net/intel/ice/ice_rxtx.c            | 158 ++++++++++----------
 drivers/net/intel/ice/ice_rxtx.h            |  78 ++--------
 drivers/net/intel/ice/ice_rxtx_common_avx.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |  14 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |  16 +-
 drivers/net/intel/ice/ice_rxtx_vec_common.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  22 +--
 12 files changed, 163 insertions(+), 194 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index db49db57d0..b4836e7914 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -12,6 +12,7 @@
 
 #define CI_RX_BURST 32
 #define CI_RX_MAX_BURST 32
+#define CI_RX_MAX_NSEG 2
 
 struct ci_rx_queue;
 
@@ -23,6 +24,8 @@ struct ci_rx_entry_sc {
 	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
 };
 
+typedef void (*ci_rx_release_mbufs_t)(struct ci_rx_queue *rxq);
+
 /**
  * Structure associated with each RX queue.
  */
@@ -32,6 +35,8 @@ struct ci_rx_queue {
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
 		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
 		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
+		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
+		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -64,10 +69,16 @@ struct ci_rx_queue {
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	uint32_t rxdid; /**< RX descriptor format ID. */
+	uint32_t proto_xtr; /* protocol extraction type */
+	uint64_t xtr_ol_flag; /* flexible descriptor metadata extraction offload flag */
+	off_t xtr_field_offs; /* Protocol extraction matedata offset*/
+	uint64_t hw_time_update; /**< Last time HW timestamp was updated */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
+		struct ice_vsi *ice_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -85,6 +96,17 @@ struct ci_rx_queue {
 			uint8_t hs_mode; /**< Header Split mode */
 			uint8_t dcb_tc; /**< Traffic class of rx queue */
 		};
+		struct { /* ice specific values */
+			ci_rx_release_mbufs_t rx_rel_mbufs; /**< release mbuf function */
+			/** holds buffer split information */
+			struct rte_eth_rxseg_split rxseg[CI_RX_MAX_NSEG];
+			struct ci_rx_entry *sw_split_buf; /**< Buffer split SW ring */
+			uint32_t rxseg_nb; /**< number of buffer split segments */
+			uint32_t time_high; /* high 32 bits of hardware timestamp register */
+			uint32_t hw_time_high; /* high 32 bits of timestamp */
+			uint32_t hw_time_low; /* low 32 bits of timestamp */
+			bool ts_enable; /* if rxq timestamp is enabled */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/ice/ice_dcf.c b/drivers/net/intel/ice/ice_dcf.c
index 65c18921f4..fddf5bbde5 100644
--- a/drivers/net/intel/ice/ice_dcf.c
+++ b/drivers/net/intel/ice/ice_dcf.c
@@ -1175,8 +1175,8 @@ ice_dcf_init_rss(struct ice_dcf_hw *hw)
 int
 ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)hw->eth_dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)hw->eth_dev->data->rx_queues;
 	struct ci_tx_queue **txq =
 		(struct ci_tx_queue **)hw->eth_dev->data->tx_queues;
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -1211,7 +1211,7 @@ ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 
 		vc_qp->rxq.max_pkt_size = rxq[i]->max_pkt_len;
 		vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
-		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_dma;
+		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 		vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
 
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index efff76afa8..53272ddd60 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -106,7 +106,7 @@ ice_dcf_xmit_pkts(__rte_unused void *tx_queue,
 }
 
 static int
-ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
+ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -145,8 +145,8 @@ ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
 static int
 ice_dcf_init_rx_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -282,7 +282,7 @@ ice_dcf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 }
 
 static int
-alloc_rxq_mbufs(struct ice_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
@@ -305,7 +305,7 @@ alloc_rxq_mbufs(struct ice_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = ICE_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
@@ -324,7 +324,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct iavf_hw *hw = &ad->real_hw.avf;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -358,7 +358,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 }
 
 static inline void
-reset_rx_queue(struct ice_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -369,7 +369,7 @@ reset_rx_queue(struct ice_rx_queue *rxq)
 	len = rxq->nb_rx_desc + ICE_RX_MAX_BURST;
 
 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)ICE_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
@@ -429,7 +429,7 @@ ice_dcf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -511,7 +511,7 @@ ice_dcf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 ice_dcf_start_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int nb_rxq = 0;
 	int nb_txq, i;
@@ -638,7 +638,7 @@ ice_dcf_stop_queues(struct rte_eth_dev *dev)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int ret, i;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.c b/drivers/net/intel/ice/ice_ethdev.c
index 2e163d706c..65cf586502 100644
--- a/drivers/net/intel/ice/ice_ethdev.c
+++ b/drivers/net/intel/ice/ice_ethdev.c
@@ -6690,7 +6690,7 @@ ice_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_adapter *ad =
 			ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t ts_high;
 	uint64_t ts_ns;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.h b/drivers/net/intel/ice/ice_ethdev.h
index afe8dae497..0ed223d83e 100644
--- a/drivers/net/intel/ice/ice_ethdev.h
+++ b/drivers/net/intel/ice/ice_ethdev.h
@@ -257,9 +257,6 @@ struct ice_vsi_list {
 	struct ice_vsi *vsi;
 };
 
-struct ice_rx_queue;
-struct ci_tx_queue;
-
 /**
  * Structure that defines a VSI, associated with a adapter.
  */
@@ -409,7 +406,7 @@ struct ice_fdir_counter_pool_container {
 struct ice_fdir_info {
 	struct ice_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	struct ci_tx_queue *txq;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt;                 /* memory for fdir program packet */
 	uint64_t dma_addr;             /* physic address of packet memory*/
 	const struct rte_memzone *mz;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 40ac01e782..4749ee729f 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -37,11 +37,11 @@ int
 ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = ICE_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.status_error0;
 
@@ -73,7 +73,7 @@ ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union ice_rx_flex_desc *rxdp)
 {
@@ -95,7 +95,7 @@ ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				   struct rte_mbuf *mb,
 				   volatile union ice_rx_flex_desc *rxdp)
 {
@@ -120,7 +120,7 @@ ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
 				      volatile union ice_rx_flex_desc *rxdp)
 {
@@ -164,7 +164,7 @@ ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
 				      volatile union ice_rx_flex_desc *rxdp)
 {
@@ -215,7 +215,7 @@ static const ice_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[] = {
 };
 
 void
-ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
+ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -243,17 +243,17 @@ ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
 }
 
 static int
-ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	struct ice_pf *pf = ICE_VSI_TO_PF(vsi);
-	struct rte_eth_dev_data *dev_data = rxq->vsi->adapter->pf.dev_data;
+	struct rte_eth_dev_data *dev_data = rxq->ice_vsi->adapter->pf.dev_data;
 	struct ice_rlan_ctx rx_ctx;
 	uint16_t buf_size;
 	uint32_t rxdid = ICE_RXDID_COMMS_OVS;
 	uint32_t regval;
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 	uint32_t frame_size = dev_data->mtu + ICE_ETH_OVERHEAD;
 	int err;
 
@@ -371,7 +371,7 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 		rx_ctx.dtype = 0; /* No Protocol Based Buffer Split mode */
 	}
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -452,15 +452,15 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 /* Allocate mbufs for all descriptors in rx queue */
 static int
-ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
+ice_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ice_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ice_rx_flex_desc *rxd;
-		rxd = &rxq->rx_ring[i];
+		rxd = ICE_RX_RING_PTR(rxq, i);
 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (unlikely(!mbuf)) {
@@ -514,7 +514,7 @@ ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
 
 /* Free all mbufs for descriptors in rx queue */
 static void
-_ice_rx_queue_release_mbufs(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -591,7 +591,7 @@ ice_switch_rx_queue(struct ice_hw *hw, uint16_t q_idx, bool on)
 }
 
 static inline int
-ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
+ice_check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -618,9 +618,9 @@ ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
 	return ret;
 }
 
-/* reset fields in ice_rx_queue back to default */
+/* reset fields in ci_rx_queue back to default */
 static void
-ice_reset_rx_queue(struct ice_rx_queue *rxq)
+ice_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 	uint16_t len;
@@ -633,7 +633,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 	len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST);
 
 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)ICE_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < ICE_RX_MAX_BURST; ++i)
@@ -655,7 +655,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -715,7 +715,7 @@ ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 ice_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -834,9 +834,9 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 }
 
 static int
-ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_fdir_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	uint32_t rxdid = ICE_RXDID_LEGACY_1;
 	struct ice_rlan_ctx rx_ctx;
@@ -848,7 +848,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -909,7 +909,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_fdir_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1099,7 +1099,7 @@ ice_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 int
 ice_fdir_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1170,7 +1170,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct ice_vsi *vsi = pf->main_vsi;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size, tlen;
 	uint16_t len;
@@ -1206,7 +1206,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket(NULL,
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 
@@ -1240,7 +1240,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 		rxq->crc_len = 0;
 
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->ice_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->proto_xtr = pf->proto_xtr != NULL ?
 			 pf->proto_xtr[queue_idx] : PROTO_XTR_NONE;
@@ -1274,8 +1274,8 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	/* Zero all the descriptors in the ring. */
 	memset(rz->addr, 0, ring_size);
 
-	rxq->rx_ring_dma = rz->iova;
-	rxq->rx_ring = rz->addr;
+	rxq->rx_ring_phys_addr = rz->iova;
+	ICE_RX_RING(rxq) = rz->addr;
 
 	/* always reserve more for bulk alloc */
 	len = (uint16_t)(nb_desc + ICE_RX_MAX_BURST);
@@ -1287,7 +1287,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the software ring. */
 	rxq->sw_ring = rte_zmalloc_socket(NULL,
-					  sizeof(struct ice_rx_entry) * tlen,
+					  sizeof(struct ci_rx_entry) * tlen,
 					  RTE_CACHE_LINE_SIZE,
 					  socket_id);
 	if (!rxq->sw_ring) {
@@ -1324,7 +1324,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 void
 ice_rx_queue_release(void *rxq)
 {
-	struct ice_rx_queue *q = (struct ice_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -1548,7 +1548,7 @@ void
 ice_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		 struct rte_eth_rxq_info *qinfo)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -1586,11 +1586,11 @@ ice_rx_queue_count(void *rx_queue)
 {
 #define ICE_RXQ_SCAN_INTERVAL 4
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	while ((desc < rxq->nb_rx_desc) &&
 	       rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	       (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)) {
@@ -1602,8 +1602,8 @@ ice_rx_queue_count(void *rx_queue)
 		desc += ICE_RXQ_SCAN_INTERVAL;
 		rxdp += ICE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-				 desc - rxq->nb_rx_desc]);
+			rxdp = ICE_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -1695,25 +1695,25 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
 #define ICE_PTP_TS_VALID 0x1
 
 static inline int
-ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
+ice_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len, hdr_len;
 	int32_t s[ICE_LOOK_AHEAD], nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags = 0;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 #endif
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -1843,7 +1843,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 }
 
 static inline uint16_t
-ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
+ice_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -1862,10 +1862,10 @@ ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
 }
 
 static inline int
-ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
+ice_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -1894,7 +1894,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 		}
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = ICE_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -1933,7 +1933,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = (struct ice_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -1951,7 +1951,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (ice_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 			PMD_RX_LOG(DEBUG, "Rx mbuf alloc failed for "
 				   "port_id=%u, queue_id=%u",
@@ -2006,12 +2006,12 @@ ice_recv_scattered_pkts(void *rx_queue,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_flex_desc *rx_ring = ICE_RX_RING(rxq);
 	volatile union ice_rx_flex_desc *rxdp;
 	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
@@ -2023,13 +2023,13 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2050,7 +2050,7 @@ ice_recv_scattered_pkts(void *rx_queue,
 		/* allocate mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
@@ -2319,7 +2319,7 @@ int
 ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint32_t desc;
 
 	if (unlikely(offset >= rxq->nb_rx_desc))
@@ -2332,7 +2332,7 @@ ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = ICE_RX_RING_PTR(rxq, desc);
 	if (rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	    (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S))
 		return RTE_ETH_RX_DESC_DONE;
@@ -2459,7 +2459,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
 int
 ice_fdir_setup_rx_resources(struct ice_pf *pf)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -2473,7 +2473,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("ice fdir rx queue",
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 SOCKET_ID_ANY);
 	if (!rxq) {
@@ -2499,12 +2499,12 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 	rxq->nb_rx_desc = ICE_FDIR_NUM_RX_DESC;
 	rxq->queue_id = ICE_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->ice_vsi = pf->fdir.fdir_vsi;
 
-	rxq->rx_ring_dma = rz->iova;
+	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, ICE_FDIR_NUM_RX_DESC *
 	       sizeof(union ice_32byte_rx_desc));
-	rxq->rx_ring = (union ice_rx_flex_desc *)rz->addr;
+	ICE_RX_RING(rxq) = (union ice_rx_flex_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -2523,12 +2523,12 @@ ice_recv_pkts(void *rx_queue,
 	      struct rte_mbuf **rx_pkts,
 	      uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_flex_desc *rx_ring = ICE_RX_RING(rxq);
 	volatile union ice_rx_flex_desc *rxdp;
 	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
 	struct rte_mbuf *nmb_pay; /* new allocated payload mbuf */
 	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
@@ -2540,13 +2540,13 @@ ice_recv_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2567,7 +2567,7 @@ ice_recv_pkts(void *rx_queue,
 		/* allocate header mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 
@@ -2594,7 +2594,7 @@ ice_recv_pkts(void *rx_queue,
 			/* allocate payload mbuf */
 			nmb_pay = rte_mbuf_raw_alloc(rxq->rxseg[1].mp);
 			if (unlikely(!nmb_pay)) {
-				rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+				rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 				rxe->mbuf = NULL;
 				nb_hold--;
 				if (unlikely(rx_id == 0))
@@ -3472,7 +3472,7 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int i;
 	int rx_check_ret = -1;
 
@@ -4634,7 +4634,7 @@ ice_set_default_ptype_table(struct rte_eth_dev *dev)
  * tx queue
  */
 static inline int
-ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
+ice_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union ice_32byte_rx_desc *rxdp;
 	uint64_t qword1;
@@ -4644,7 +4644,7 @@ ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
 	int ret = -EAGAIN;
 
 	rxdp = (volatile union ice_32byte_rx_desc *)
-		(&rxq->rx_ring[rxq->rx_tail]);
+			ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & ICE_RXD_QW1_STATUS_M)
 			>> ICE_RXD_QW1_STATUS_S;
@@ -4689,7 +4689,7 @@ int
 ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct ice_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	volatile struct ice_fltr_desc *fdirdp;
 	volatile struct ice_tx_desc *txdp;
 	uint32_t td_cmd;
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 276d40b57f..1a39770d7d 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _ICE_RXTX_H_
 #define _ICE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 #include "ice_ethdev.h"
 
@@ -14,21 +15,28 @@
 #define ICE_DMA_MEM_ALIGN    4096
 #define ICE_RING_BASE_ALIGN  128
 
-#define ICE_RX_MAX_BURST 32
+#define ICE_RX_MAX_BURST CI_RX_MAX_BURST
 #define ICE_TX_MAX_BURST 32
 
 /* Maximal number of segments to split. */
-#define ICE_RX_MAX_NSEG 2
+#define ICE_RX_MAX_NSEG CI_RX_MAX_NSEG
 
 #define ICE_CHK_Q_ENA_COUNT        100
 #define ICE_CHK_Q_ENA_INTERVAL_US  100
 
 #ifdef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 #define ice_rx_flex_desc ice_16b_rx_flex_desc
+#define ICE_RX_RING(rxq) \
+	((rxq)->ice_rx_16b_ring)
 #else
 #define ice_rx_flex_desc ice_32b_rx_flex_desc
+#define ICE_RX_RING(rxq) \
+	((rxq)->ice_rx_32b_ring)
 #endif
 
+#define ICE_RX_RING_PTR(rxq, entry) \
+	(ICE_RX_RING(rxq) + (entry))
+
 #define ICE_SUPPORT_CHAIN_NUM 5
 
 #define ICE_TD_CMD                      ICE_TX_DESC_CMD_EOP
@@ -78,74 +86,16 @@ extern int ice_timestamp_dynfield_offset;
 
 #define ICE_TX_MTU_SEG_MAX	8
 
-typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
-typedef void (*ice_rxd_to_pkt_fields_t)(struct ice_rx_queue *rxq,
+typedef void (*ice_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 					struct rte_mbuf *mb,
 					volatile union ice_rx_flex_desc *rxdp);
 
-struct ice_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
 enum ice_rx_dtype {
 	ICE_RX_DTYPE_NO_SPLIT       = 0,
 	ICE_RX_DTYPE_HEADER_SPLIT   = 1,
 	ICE_RX_DTYPE_SPLIT_ALWAYS   = 2,
 };
 
-struct ice_rx_queue {
-	struct rte_mempool *mp; /* mbuf pool to populate RX ring */
-	volatile union ice_rx_flex_desc *rx_ring;/* RX ring virtual address */
-	rte_iova_t rx_ring_dma; /* RX ring DMA address */
-	struct ice_rx_entry *sw_ring; /* address of RX soft ring */
-	uint16_t nb_rx_desc; /* number of RX descriptors */
-	uint16_t rx_free_thresh; /* max free RX desc to hold */
-	uint16_t rx_tail; /* current value of tail */
-	uint16_t nb_rx_hold; /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-	struct rte_mbuf *rx_stage[ICE_RX_MAX_BURST * 2];
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /* device port ID */
-	uint8_t crc_len; /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /* RX queue index */
-	uint16_t reg_idx; /* RX queue register index */
-	uint8_t drop_en; /* if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /* register address of tail */
-	struct ice_vsi *vsi; /* the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	bool q_set; /* indicate if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	uint8_t proto_xtr; /* Protocol extraction from flexible descriptor */
-	int xtr_field_offs; /*Protocol extraction matedata offset*/
-	uint64_t xtr_ol_flag; /* Protocol extraction offload flag */
-	uint32_t rxdid; /* Receive Flex Descriptor profile ID */
-	ice_rx_release_mbufs_t rx_rel_mbufs;
-	uint64_t offloads;
-	uint32_t time_high;
-	uint32_t hw_register_set;
-	const struct rte_memzone *mz;
-	uint32_t hw_time_high; /* high 32 bits of timestamp */
-	uint32_t hw_time_low; /* low 32 bits of timestamp */
-	uint64_t hw_time_update; /* SW time of HW record updating */
-	struct ice_rx_entry *sw_split_buf;
-	/* address of temp buffer for RX split mbufs */
-	struct rte_eth_rxseg_split rxseg[ICE_RX_MAX_NSEG];
-	uint32_t rxseg_nb;
-	bool ts_enable; /* if rxq timestamp is enabled */
-};
-
 /* Offload features */
 union ice_tx_offload {
 	uint64_t data;
@@ -249,12 +199,12 @@ int ice_tx_descriptor_status(void *tx_queue, uint16_t offset);
 void ice_set_default_ptype_table(struct rte_eth_dev *dev);
 const uint32_t *ice_dev_supported_ptypes_get(struct rte_eth_dev *dev,
 					     size_t *no_of_elements);
-void ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq,
+void ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq,
 					  uint32_t rxdid);
 
 int ice_rx_vec_dev_check(struct rte_eth_dev *dev);
 int ice_tx_vec_dev_check(struct rte_eth_dev *dev);
-int ice_rxq_vec_setup(struct ice_rx_queue *rxq);
+int ice_rxq_vec_setup(struct ci_rx_queue *rxq);
 int ice_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
@@ -299,7 +249,7 @@ int ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 #define FDIR_PARSING_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->pf.dev_data->nb_rx_queues; i++) { \
-		struct ice_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
index c62e60c70e..7209c902db 100644
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ b/drivers/net/intel/ice/ice_rxtx_common_avx.h
@@ -9,14 +9,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 0c54b325c6..f4555369a2 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -8,7 +8,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, false);
 }
@@ -33,17 +33,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet,
 			    bool offload)
 {
 #define ICE_DESCS_PER_LOOP_AVX 8
 
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ice_rx_flex_desc *rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -445,7 +445,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -694,7 +694,7 @@ static __rte_always_inline uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts, bool offload)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index bd49be07c9..6eea74d703 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -10,7 +10,7 @@
 #define ICE_DESCS_PER_LOOP_AVX 8
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, true);
 }
@@ -35,17 +35,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
+_ice_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			      struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts,
 			      uint8_t *split_packet,
 			      bool do_offload)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ice_rx_flex_desc *rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -467,7 +467,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -723,7 +723,7 @@ static uint16_t
 ice_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -765,7 +765,7 @@ ice_recv_scattered_burst_vec_avx512_offload(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_common.h b/drivers/net/intel/ice/ice_rxtx_vec_common.h
index 7933c26366..9430a99ba5 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/intel/ice/ice_rxtx_vec_common.h
@@ -17,7 +17,7 @@ ice_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -79,7 +79,7 @@ _ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
 #define ICE_VECTOR_OFFLOAD_PATH	1
 
 static inline int
-ice_rx_vec_queue_default(struct ice_rx_queue *rxq)
+ice_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -119,7 +119,7 @@ static inline int
 ice_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret = 0;
 	int result = 0;
 
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 97f05ba45e..dc9d37226a 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -26,18 +26,18 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 }
 
 static inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 					  RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -105,7 +105,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 }
 
 static inline void
-ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
+ice_rx_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 			 struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -301,15 +301,15 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	__m128i crc_adjust = _mm_set_epi16
 				(0, 0, 0,       /* ignore non-length fields */
 				 -rxq->crc_len, /* sub crc on data_len */
@@ -361,7 +361,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -482,7 +482,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		 * needs to load 2nd 16B of each desc for RSS hash parsing,
 		 * will cause performance drop to get into this context.
 		 */
-		if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+		if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 			/* load bottom half of every 32B desc */
 			const __m128i raw_desc_bh3 =
@@ -608,7 +608,7 @@ static uint16_t
 ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -779,7 +779,7 @@ ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 int __rte_cold
-ice_rxq_vec_setup(struct ice_rx_queue *rxq)
+ice_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 06/13] net/iavf: use the common Rx queue structure
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (3 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 05/13] net/ice: " Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin, Ian Stokes

Make the iavf driver use the new common Rx queue structure.

Because the iavf driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_IAVF_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue structure
will have both, while the actual descriptor format is picked by iavf at
compile time using the above macro. Direct usage of Rx queue structure is
now meant to be replaced with a macro access that takes descriptor size
into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  12 ++
 drivers/net/intel/iavf/iavf.h                 |   4 +-
 drivers/net/intel/iavf/iavf_ethdev.c          |  12 +-
 drivers/net/intel/iavf/iavf_rxtx.c            | 192 +++++++++---------
 drivers/net/intel/iavf/iavf_rxtx.h            |  76 ++-----
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  24 +--
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  22 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  27 ++-
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  12 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  46 ++---
 drivers/net/intel/iavf/iavf_vchnl.c           |   6 +-
 11 files changed, 198 insertions(+), 235 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index b4836e7914..507235f4c6 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -37,6 +37,8 @@ struct ci_rx_queue {
 		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
 		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
 		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
+		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
+		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -79,6 +81,7 @@ struct ci_rx_queue {
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
 		struct ice_vsi *ice_vsi;
+		struct iavf_vsi *iavf_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -107,6 +110,15 @@ struct ci_rx_queue {
 			uint32_t hw_time_low; /* low 32 bits of timestamp */
 			bool ts_enable; /* if rxq timestamp is enabled */
 		};
+		struct { /* iavf specific values */
+			const struct iavf_rxq_ops *ops; /**< queue ops */
+			struct iavf_rx_queue_stats *stats; /**< per-queue stats */
+			uint64_t phc_time; /**< HW timestamp */
+			uint8_t rel_mbufs_type; /**< type of release mbuf function */
+			uint8_t rx_flags; /**< Rx VLAN tag location flags */
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
+		};
 	};
 };
 
diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 97e6b243fb..f81c939c96 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -97,7 +97,7 @@
 #define IAVF_L2TPV2_FLAGS_LEN	0x4000
 
 struct iavf_adapter;
-struct iavf_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 
@@ -555,7 +555,7 @@ int iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
 		uint8_t *resp_msg, size_t resp_msg_len);
 extern const struct rte_tm_ops iavf_tm_ops;
 int iavf_get_ptp_cap(struct iavf_adapter *adapter);
-int iavf_get_phc_time(struct iavf_rx_queue *rxq);
+int iavf_get_phc_time(struct ci_rx_queue *rxq);
 int iavf_flow_sub(struct iavf_adapter *adapter,
 		  struct iavf_fsub_conf *filter);
 int iavf_flow_unsub(struct iavf_adapter *adapter,
diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index 5babd587b3..4e843a3532 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -728,7 +728,7 @@ iavf_dev_configure(struct rte_eth_dev *dev)
 }
 
 static int
-iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
+iavf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -779,8 +779,8 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
 static int
 iavf_init_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue **rxq =
-		(struct iavf_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret = IAVF_SUCCESS;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -955,7 +955,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 static int
 iavf_start_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 	uint16_t nb_txq, nb_rxq;
@@ -1867,9 +1867,9 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 {
 	uint16_t idx;
 	for (idx = 0; idx < ethdev->data->nb_rx_queues; idx++) {
-		struct iavf_rx_queue *rxq;
+		struct ci_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
-		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
+		rxq = (struct ci_rx_queue *)ethdev->data->rx_queues[idx];
 		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index d23d2df807..a9ce4b55d9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -128,12 +128,12 @@ iavf_monitor_callback(const uint64_t value,
 int
 iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile union iavf_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = IAVF_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.qword1.status_error_len;
 
@@ -222,7 +222,7 @@ check_tx_vec_allow(struct ci_tx_queue *txq)
 }
 
 static inline bool
-check_rx_bulk_allow(struct iavf_rx_queue *rxq)
+check_rx_bulk_allow(struct ci_rx_queue *rxq)
 {
 	int ret = true;
 
@@ -243,7 +243,7 @@ check_rx_bulk_allow(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-reset_rx_queue(struct iavf_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -254,12 +254,12 @@ reset_rx_queue(struct iavf_rx_queue *rxq)
 	len = rxq->nb_rx_desc + IAVF_RX_MAX_BURST;
 
 	for (i = 0; i < len * sizeof(union iavf_rx_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)IAVF_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
 	for (i = 0; i < IAVF_RX_MAX_BURST; i++)
-		rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf;
+		rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
 
 	/* for rx bulk */
 	rxq->rx_nb_avail = 0;
@@ -315,7 +315,7 @@ reset_tx_queue(struct ci_tx_queue *txq)
 }
 
 static int
-alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	volatile union iavf_rx_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
@@ -326,8 +326,8 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!mbuf)) {
 			for (j = 0; j < i; j++) {
-				rte_pktmbuf_free_seg(rxq->sw_ring[j]);
-				rxq->sw_ring[j] = NULL;
+				rte_pktmbuf_free_seg(rxq->sw_ring[j].mbuf);
+				rxq->sw_ring[j].mbuf = NULL;
 			}
 			PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
 			return -ENOMEM;
@@ -342,7 +342,7 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = IAVF_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
@@ -350,14 +350,14 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		rxd->read.rsvd2 = 0;
 #endif
 
-		rxq->sw_ring[i] = mbuf;
+		rxq->sw_ring[i].mbuf = mbuf;
 	}
 
 	return 0;
 }
 
 static inline void
-release_rxq_mbufs(struct iavf_rx_queue *rxq)
+release_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -365,9 +365,9 @@ release_rxq_mbufs(struct iavf_rx_queue *rxq)
 		return;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
-		if (rxq->sw_ring[i]) {
-			rte_pktmbuf_free_seg(rxq->sw_ring[i]);
-			rxq->sw_ring[i] = NULL;
+		if (rxq->sw_ring[i].mbuf) {
+			rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+			rxq->sw_ring[i].mbuf = NULL;
 		}
 	}
 
@@ -395,7 +395,7 @@ struct iavf_rxq_ops iavf_rxq_release_mbufs_ops[] = {
 };
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				    struct rte_mbuf *mb,
 				    volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -420,7 +420,7 @@ iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -462,7 +462,7 @@ iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -517,7 +517,7 @@ iavf_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[IAVF_RXDID_LAST + 1] = {
 };
 
 static void
-iavf_select_rxd_to_pkt_fields_handler(struct iavf_rx_queue *rxq, uint32_t rxdid)
+iavf_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -572,7 +572,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	struct iavf_info *vf =
 		IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_vsi *vsi = &vf->vsi;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *mz;
 	uint32_t ring_size;
 	uint8_t proto_xtr;
@@ -610,7 +610,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("iavf rxq",
-				 sizeof(struct iavf_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -668,7 +668,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	rxq->port_id = dev->data->port_id;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->rx_hdr_len = 0;
-	rxq->vsi = vsi;
+	rxq->iavf_vsi = vsi;
 	rxq->offloads = offloads;
 
 	if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
@@ -713,7 +713,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	/* Zero all the descriptors in the ring. */
 	memset(mz->addr, 0, ring_size);
 	rxq->rx_ring_phys_addr = mz->iova;
-	rxq->rx_ring = (union iavf_rx_desc *)mz->addr;
+	IAVF_RX_RING(rxq) = (union iavf_rx_desc *)mz->addr;
 
 	rxq->mz = mz;
 	reset_rx_queue(rxq);
@@ -905,7 +905,7 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	PMD_DRV_FUNC_TRACE();
@@ -997,7 +997,7 @@ iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	PMD_DRV_FUNC_TRACE();
@@ -1060,7 +1060,7 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 void
 iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-	struct iavf_rx_queue *q = dev->data->rx_queues[qid];
+	struct ci_rx_queue *q = dev->data->rx_queues[qid];
 
 	if (!q)
 		return;
@@ -1089,7 +1089,7 @@ iavf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 static void
 iavf_reset_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 
@@ -1375,7 +1375,7 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
  * from the hardware point of view.
  */
 static inline void
-iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+iavf_update_rx_tail(struct ci_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
 {
 	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
 
@@ -1397,9 +1397,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	volatile union iavf_rx_desc *rx_ring;
 	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1416,8 +1416,8 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = IAVF_RX_RING(rxq);
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1442,13 +1442,13 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1456,9 +1456,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1506,9 +1506,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 {
 	volatile union iavf_rx_desc *rx_ring;
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1525,8 +1525,8 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = IAVF_RX_RING(rxq);
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1559,13 +1559,13 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1573,9 +1573,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1629,9 +1629,9 @@ uint16_t
 iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1643,9 +1643,9 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t pkt_flags;
 	uint64_t ts_ns;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_desc *rx_ring = IAVF_RX_RING(rxq);
 	volatile union iavf_rx_flex_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1678,13 +1678,13 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1692,10 +1692,10 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1806,9 +1806,9 @@ uint16_t
 iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1820,9 +1820,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_desc *rx_ring = IAVF_RX_RING(rxq);
 	volatile union iavf_rx_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1847,13 +1847,13 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1861,10 +1861,10 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1963,12 +1963,12 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 #define IAVF_LOOK_AHEAD 8
 static inline int
-iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
+iavf_rx_scan_hw_ring_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts)
 {
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len;
@@ -1976,10 +1976,10 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 	uint64_t ts_ns;
 
-	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxdp = (volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2038,7 +2038,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 					  rxq->rx_tail +
 					  i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
 				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
 			mb->data_len = pkt_len;
@@ -2072,11 +2072,11 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else {
 				/* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2090,16 +2090,16 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline int
-iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+iavf_rx_scan_hw_ring(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -2108,9 +2108,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
@@ -2164,7 +2164,7 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
 					 rxq->rx_tail + i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			qword1 = rte_le_to_cpu_64
 					(rxdp[j].wb.qword1.status_error_len);
 			pkt_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
@@ -2190,10 +2190,10 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else { /* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2207,13 +2207,13 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline uint16_t
-iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
+iavf_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -2232,10 +2232,10 @@ iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
 }
 
 static inline int
-iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
+iavf_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -2252,13 +2252,13 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = IAVF_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
-			rte_prefetch0(rxep[i + 1]);
+			rte_prefetch0(rxep[i + 1].mbuf);
 
-		mb = rxep[i];
+		mb = rxep[i].mbuf;
 		rte_mbuf_refcnt_set(mb, 1);
 		mb->next = NULL;
 		mb->data_off = RTE_PKTMBUF_HEADROOM;
@@ -2284,7 +2284,7 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = (struct iavf_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -2312,11 +2312,11 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 			rxq->rx_tail = (uint16_t)(rxq->rx_tail - (nb_rx + nb_staged));
 			for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++) {
-				rxq->sw_ring[j] = rx_pkts[i];
+				rxq->sw_ring[j].mbuf = rx_pkts[i];
 				rx_pkts[i] = NULL;
 			}
 			for (i = 0, j = rxq->rx_tail + nb_rx; i < nb_staged; i++, j++) {
-				rxq->sw_ring[j] = rxq->rx_stage[i];
+				rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
 				rx_pkts[i] = NULL;
 			}
 
@@ -3843,13 +3843,13 @@ static uint16_t
 iavf_recv_pkts_no_poll(void *rx_queue, struct rte_mbuf **rx_pkts,
 				uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	enum iavf_rx_burst_type rx_burst_type;
 
-	if (!rxq->vsi || rxq->vsi->adapter->no_poll)
+	if (!rxq->iavf_vsi || rxq->iavf_vsi->adapter->no_poll)
 		return 0;
 
-	rx_burst_type = rxq->vsi->adapter->rx_burst_type;
+	rx_burst_type = rxq->iavf_vsi->adapter->rx_burst_type;
 
 	return iavf_rx_pkt_burst_ops[rx_burst_type].pkt_burst(rx_queue,
 								rx_pkts, nb_pkts);
@@ -3965,7 +3965,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	enum iavf_rx_burst_type rx_burst_type;
 	int no_poll_on_link_down = adapter->devargs.no_poll_on_link_down;
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool use_flex = true;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -4379,7 +4379,7 @@ void
 iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		     struct rte_eth_rxq_info *qinfo)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -4414,11 +4414,11 @@ iavf_dev_rxq_count(void *rx_queue)
 {
 #define IAVF_RXQ_SCAN_INTERVAL 4
 	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
@@ -4431,8 +4431,8 @@ iavf_dev_rxq_count(void *rx_queue)
 		desc += IAVF_RXQ_SCAN_INTERVAL;
 		rxdp += IAVF_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-					desc - rxq->nb_rx_desc]);
+			rxdp = IAVF_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -4441,7 +4441,7 @@ iavf_dev_rxq_count(void *rx_queue)
 int
 iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -4456,7 +4456,7 @@ iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
+	status = &IAVF_RX_RING_PTR(rxq, desc)->wb.qword1.status_error_len;
 	mask = rte_le_to_cpu_64((1ULL << IAVF_RX_DESC_STATUS_DD_SHIFT)
 		<< IAVF_RXD_QW1_STATUS_SHIFT);
 	if (*status & mask)
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 62b5a67c84..c43ddc3c2f 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -17,7 +17,7 @@
 #define IAVF_RING_BASE_ALIGN      128
 
 /* used for Rx Bulk Allocate */
-#define IAVF_RX_MAX_BURST         32
+#define IAVF_RX_MAX_BURST         CI_RX_MAX_BURST
 
 /* Max data buffer size must be 16K - 128 bytes */
 #define IAVF_RX_MAX_DATA_BUF_SIZE (16 * 1024 - 128)
@@ -198,17 +198,24 @@ union iavf_32b_rx_flex_desc {
 #ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 #define iavf_rx_desc iavf_16byte_rx_desc
 #define iavf_rx_flex_desc iavf_16b_rx_flex_desc
+#define IAVF_RX_RING(rxq) \
+	((rxq)->iavf_rx_16b_ring)
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
 #define iavf_rx_flex_desc iavf_32b_rx_flex_desc
+#define IAVF_RX_RING(rxq) \
+	((rxq)->iavf_rx_32b_ring)
 #endif
 
-typedef void (*iavf_rxd_to_pkt_fields_t)(struct iavf_rx_queue *rxq,
+#define IAVF_RX_RING_PTR(rxq, entry) \
+	(IAVF_RX_RING(rxq) + (entry))
+
+typedef void (*iavf_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 				struct rte_mbuf *mb,
 				volatile union iavf_rx_flex_desc *rxdp);
 
 struct iavf_rxq_ops {
-	void (*release_mbufs)(struct iavf_rx_queue *rxq);
+	void (*release_mbufs)(struct ci_rx_queue *rxq);
 };
 
 struct iavf_txq_ops {
@@ -221,59 +228,6 @@ struct iavf_rx_queue_stats {
 	struct iavf_ipsec_crypto_stats ipsec_crypto;
 };
 
-/* Structure associated with each Rx queue. */
-struct iavf_rx_queue {
-	struct rte_mempool *mp;       /* mbuf pool to populate Rx ring */
-	const struct rte_memzone *mz; /* memzone for Rx ring */
-	volatile union iavf_rx_desc *rx_ring; /* Rx ring virtual address */
-	uint64_t rx_ring_phys_addr;   /* Rx ring DMA address */
-	struct rte_mbuf **sw_ring;     /* address of SW ring */
-	uint16_t nb_rx_desc;          /* ring length */
-	uint16_t rx_tail;             /* current value of tail */
-	volatile uint8_t *qrx_tail;   /* register address of tail */
-	uint16_t rx_free_thresh;      /* max free RX desc to hold */
-	uint16_t nb_rx_hold;          /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
-	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
-	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
-	uint8_t rxdid;
-	uint8_t rel_mbufs_type;
-
-	/* used for VPMD */
-	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
-	uint16_t rxrearm_start;    /* the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /* value to init mbufs */
-
-	/* for rx bulk */
-	uint16_t rx_nb_avail;      /* number of staged packets ready */
-	uint16_t rx_next_avail;    /* index of next staged packets */
-	uint16_t rx_free_trigger;  /* triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[IAVF_RX_MAX_BURST * 2]; /* store mbuf */
-
-	uint16_t port_id;        /* device port ID */
-	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id;      /* Rx queue index */
-	uint16_t rx_buf_len;    /* The packet buffer size */
-	uint16_t rx_hdr_len;    /* The header buffer size */
-	uint16_t max_pkt_len;   /* Maximum packet length */
-	struct iavf_vsi *vsi; /**< the VSI this queue belongs to */
-
-	bool q_set;             /* if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	const struct iavf_rxq_ops *ops;
-	uint8_t rx_flags;
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
-	uint8_t proto_xtr; /* protocol extraction type */
-	uint64_t xtr_ol_flag;
-		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats *stats;
-	uint64_t offloads;
-	uint64_t phc_time;
-	uint64_t hw_time_update;
-};
-
 /* Offload features */
 union iavf_tx_offload {
 	uint64_t data;
@@ -691,7 +645,7 @@ uint16_t iavf_xmit_pkts_vec_avx2_offload(void *tx_queue, struct rte_mbuf **tx_pk
 int iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 int iavf_rx_vec_dev_check(struct rte_eth_dev *dev);
 int iavf_tx_vec_dev_check(struct rte_eth_dev *dev);
-int iavf_rxq_vec_setup(struct iavf_rx_queue *rxq);
+int iavf_rxq_vec_setup(struct ci_rx_queue *rxq);
 int iavf_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t iavf_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts);
@@ -731,12 +685,12 @@ uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
 
 void iavf_set_default_ptype_table(struct rte_eth_dev *dev);
 void iavf_tx_queue_release_mbufs_avx512(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq);
 void iavf_tx_queue_release_mbufs_sse(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq);
 
 static inline
-void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq,
+void iavf_dump_rx_descriptor(struct ci_rx_queue *rxq,
 			    const volatile void *desc,
 			    uint16_t rx_id)
 {
@@ -794,7 +748,7 @@ void iavf_dump_tx_descriptor(const struct ci_tx_queue *txq,
 #define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->dev_data->nb_rx_queues; i++) { \
-		struct iavf_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index 88e35dc3e9..f51fa4acf9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -7,7 +7,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, false);
 }
@@ -15,21 +15,19 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 #define PKTLEN_SHIFT     10
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq,
 			     struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts, uint8_t *split_packet,
 			     bool offload)
 {
 #define IAVF_DESCS_PER_LOOP_AVX 8
 
-	/* const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; */
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	/* struct iavf_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail]; */
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_desc *rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -487,14 +485,14 @@ flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct ci_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts, uint8_t *split_packet,
 				      bool offload)
 {
 #define IAVF_DESCS_PER_LOOP_AVX 8
 
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
@@ -503,9 +501,9 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+		(volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -1476,7 +1474,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1565,7 +1563,7 @@ iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index f2af028bef..80495f33cd 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -28,26 +28,26 @@
 #define IAVF_RX_TS_OFFLOAD
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			       struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts, uint8_t *split_packet,
 			       bool offload)
 {
 #ifdef IAVF_RX_PTYPE_OFFLOAD
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 #endif
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_desc *rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -578,13 +578,13 @@ flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct ci_rx_queue *rxq,
 					struct rte_mbuf **rx_pkts,
 					uint16_t nb_pkts,
 					uint8_t *split_packet,
 					bool offload)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -594,9 +594,9 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+		(volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -1653,7 +1653,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1729,7 +1729,7 @@ iavf_recv_scattered_burst_vec_avx512_flex_rxd(void *rx_queue,
 					      uint16_t nb_pkts,
 					      bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index 38e9a206d9..f0a7d19b6a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -8,7 +8,6 @@
 #include <ethdev_driver.h>
 #include <rte_malloc.h>
 
-#include "../common/rx.h"
 #include "iavf.h"
 #include "iavf_rxtx.h"
 
@@ -21,7 +20,7 @@ iavf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
+_iavf_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -32,15 +31,15 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 	/* free all mbufs that are valid in the ring */
 	if (rxq->rxrearm_nb == 0) {
 		for (i = 0; i < rxq->nb_rx_desc; i++) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	} else {
 		for (i = rxq->rx_tail;
 		     i != rxq->rxrearm_start;
 		     i = (i + 1) & mask) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	}
 
@@ -51,7 +50,7 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 }
 
 static inline int
-iavf_rx_vec_queue_default(struct iavf_rx_queue *rxq)
+iavf_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -117,7 +116,7 @@ static inline int
 iavf_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret;
 	int result = 0;
 
@@ -240,14 +239,14 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 
 #ifdef RTE_ARCH_X86
 static __rte_always_inline void
-iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
+iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -259,7 +258,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -278,8 +277,8 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index a583340f15..e1c8f3c7f9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -15,7 +15,7 @@
 #include "iavf_rxtx_vec_common.h"
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
@@ -75,7 +75,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	RTE_SET_USED(rxdp);
@@ -193,7 +193,7 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
@@ -203,7 +203,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
 	struct rte_mbuf **sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -394,13 +394,13 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_NEON_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 2e41079e88..f18dfd636c 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -13,19 +13,19 @@
 #include <rte_vect.h>
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
@@ -33,7 +33,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -47,8 +47,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
@@ -88,7 +88,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		  struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -206,11 +206,11 @@ flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
 		       struct rte_mbuf **rx_pkts)
 #else
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
 #endif
 {
@@ -466,16 +466,16 @@ flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -500,7 +500,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -571,7 +571,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -714,16 +714,16 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+_recv_raw_pkts_vec_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -779,7 +779,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = (volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+	rxdp = (volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -857,7 +857,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -1207,7 +1207,7 @@ static uint16_t
 iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1276,7 +1276,7 @@ iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
 				       struct rte_mbuf **rx_pkts,
 				       uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1449,7 +1449,7 @@ iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
@@ -1462,7 +1462,7 @@ iavf_txq_vec_setup(struct ci_tx_queue *txq)
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_SSE_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_vchnl.c b/drivers/net/intel/iavf/iavf_vchnl.c
index 6feca8435e..9f8bb07726 100644
--- a/drivers/net/intel/iavf/iavf_vchnl.c
+++ b/drivers/net/intel/iavf/iavf_vchnl.c
@@ -1218,7 +1218,7 @@ int
 iavf_configure_queues(struct iavf_adapter *adapter,
 		uint16_t num_queue_pairs, uint16_t index)
 {
-	struct iavf_rx_queue **rxq = (struct iavf_rx_queue **)adapter->dev_data->rx_queues;
+	struct ci_rx_queue **rxq = (struct ci_rx_queue **)adapter->dev_data->rx_queues;
 	struct ci_tx_queue **txq = (struct ci_tx_queue **)adapter->dev_data->tx_queues;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -2258,9 +2258,9 @@ iavf_get_ptp_cap(struct iavf_adapter *adapter)
 }
 
 int
-iavf_get_phc_time(struct iavf_rx_queue *rxq)
+iavf_get_phc_time(struct ci_rx_queue *rxq)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_phc_time phc_time;
 	struct iavf_cmd_info args;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (4 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 06/13] net/iavf: " Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson

There is certain amount of duplication between various drivers when it
comes to Rx ring rearm. This patch takes implementation from ice driver
as a base because it has support for no IOVA in mbuf as well as all
vector implementations, and moves them to a common file.

The driver Rx rearm code used copious amounts of #ifdef-ery to
discriminate between 16- and 32-byte descriptor support, but we cannot do
that in the common code because we will not have access to those
definitions. So, instead, we use copious amounts of compile-time constant
propagation and force-inlining to ensure that the compiler generates
effectively the same code it generated back when it was in the driver. We
also add a compile-time definition for vectorization levels for x86
vector instructions to discriminate between different instruction sets.
This too is constant-propagated, and thus should not affect performance.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h               |   3 +
 drivers/net/intel/common/rx_vec_sse.h       | 323 ++++++++++++++++++++
 drivers/net/intel/ice/ice_rxtx.h            |   2 +-
 drivers/net/intel/ice/ice_rxtx_common_avx.h | 233 --------------
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  77 +----
 7 files changed, 336 insertions(+), 312 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_sse.h
 delete mode 100644 drivers/net/intel/ice/ice_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 507235f4c6..b084224e34 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -13,6 +13,8 @@
 #define CI_RX_BURST 32
 #define CI_RX_MAX_BURST 32
 #define CI_RX_MAX_NSEG 2
+#define CI_VPMD_DESCS_PER_LOOP 4
+#define CI_VPMD_RX_REARM_THRESH 64
 
 struct ci_rx_queue;
 
@@ -39,6 +41,7 @@ struct ci_rx_queue {
 		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
 		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
 		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
+		volatile void *rx_ring; /**< Generic */
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
new file mode 100644
index 0000000000..6fe0baf38b
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_sse.h
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_SSE_H_
+#define _COMMON_INTEL_RX_VEC_SSE_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+
+#include "rx.h"
+
+enum ci_rx_vec_level {
+	CI_RX_VEC_LEVEL_SSE = 0,
+	CI_RX_VEC_LEVEL_AVX2,
+	CI_RX_VEC_LEVEL_AVX512,
+};
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void **)rxp,
+				 rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
+				_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr),
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m128i zero = _mm_setzero_si128();
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
+		__m128i vaddr0, vaddr1;
+		__m128i dma_addr0, dma_addr1;
+		struct rte_mbuf *mb0, *mb1;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* add headroom to address values */
+		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
+		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
+		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
+	}
+}
+
+#ifdef __AVX2__
+/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const size_t desc_len = 16;
+	volatile void *rxdp;
+	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m256i zero = _mm256_setzero_si256();
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+		mb2 = rxp[2].mbuf;
+		mb3 = rxp[3].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+
+		/* add headroom to address values */
+		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
+		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
+		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
+	}
+}
+#endif /* __AVX2__ */
+
+#ifdef __AVX512VL__
+/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const size_t desc_len = 16;
+	volatile void *rxdp;
+	int i;
+	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+	__m512i dma_addr0_3, dma_addr4_7;
+	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	__m512i zero = _mm512_setzero_si512();
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i vaddr4_5, vaddr6_7;
+		__m512i vaddr0_3, vaddr4_7;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+		mb2 = rxp[2].mbuf;
+		mb3 = rxp[3].mbuf;
+		mb4 = rxp[4].mbuf;
+		mb5 = rxp[5].mbuf;
+		mb6 = rxp[6].mbuf;
+		mb7 = rxp[7].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3, and so on.
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+		vaddr4_5 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+						vaddr5, 1);
+		vaddr6_7 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+						vaddr7, 1);
+		vaddr0_3 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+						vaddr2_3, 1);
+		vaddr4_7 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+						vaddr6_7, 1);
+
+		/* add headroom to address values */
+		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
+		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
+		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
+		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
+	}
+}
+#endif /* __AVX512VL__ */
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
+		const enum ci_rx_vec_level vec_level)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
+		return;
+
+	if (desc_len == 16) {
+		switch (vec_level) {
+		case CI_RX_VEC_LEVEL_AVX512:
+#ifdef __AVX512VL__
+			_ci_rxq_rearm_avx512(rxq);
+			break;
+#else
+			/* fall back to AVX2 unless requested not to */
+			/* fall through */
+#endif
+		case CI_RX_VEC_LEVEL_AVX2:
+#ifdef __AVX2__
+			_ci_rxq_rearm_avx2(rxq);
+			break;
+#else
+			/* fall back to SSE if AVX2 isn't supported */
+			/* fall through */
+#endif
+		case CI_RX_VEC_LEVEL_SSE:
+			_ci_rxq_rearm_sse(rxq, desc_len);
+			break;
+		}
+	} else {
+		/* for 32-byte descriptors only support SSE */
+		_ci_rxq_rearm_sse(rxq, desc_len);
+	}
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_SSE_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 1a39770d7d..72d0972587 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -43,7 +43,7 @@
 
 #define ICE_VPMD_RX_BURST           32
 #define ICE_VPMD_TX_BURST           32
-#define ICE_RXQ_REARM_THRESH        64
+#define ICE_RXQ_REARM_THRESH        CI_VPMD_RX_REARM_THRESH
 #define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
 #define ICE_TX_MAX_FREE_BUF_SZ      64
 #define ICE_DESCS_PER_LOOP          4
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
deleted file mode 100644
index 7209c902db..0000000000
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2019 Intel Corporation
- */
-
-#ifndef _ICE_RXTX_COMMON_AVX_H_
-#define _ICE_RXTX_COMMON_AVX_H_
-
-#include "ice_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, vaddr4_7);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__ */
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, vaddr2_3);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__ */
-
-#endif /* _ICE_RXTX_COMMON_AVX_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index f4555369a2..5ca3f92482 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -3,14 +3,15 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_AVX2);
 }
 
 static __rte_always_inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index 6eea74d703..883ea97c07 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -3,7 +3,8 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
@@ -12,7 +13,7 @@
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_AVX512);
 }
 
 static inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index dc9d37226a..fa0c7e8829 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -4,6 +4,8 @@
 
 #include "ice_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline __m128i
@@ -28,80 +30,7 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 static inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-					  RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				 offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 08/13] net/i40e: use common Rx rearm code
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (5 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 09/13] net/iavf: " Anatoly Burakov
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

The i40e driver has an implementation of vectorized mbuf rearm code that
is identical to the one in the common code, so just use that.

In addition, the i40e has an implementation of Rx queue rearm for Neon
instruction set, so create a common header for Neon implementations too,
and use that in i40e Neon code.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx_vec_neon.h        | 131 +++++++++++
 drivers/net/intel/i40e/i40e_rxtx.h            |   2 +-
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 215 ------------------
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  59 +----
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  70 +-----
 7 files changed, 144 insertions(+), 343 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_neon.h
 delete mode 100644 drivers/net/intel/i40e/i40e_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx_vec_neon.h b/drivers/net/intel/common/rx_vec_neon.h
new file mode 100644
index 0000000000..35379ab563
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_neon.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_NEON_H_
+#define _COMMON_INTEL_RX_VEC_NEON_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+#include <rte_vect.h>
+
+#include "rx.h"
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void **)rxp,
+				 rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			uint64x2_t zero = vdupq_n_u64(0);
+
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
+				vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr), zero);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_neon(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const uint64x2_t zero = vdupq_n_u64(0);
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile void *rxdp;
+	int i;
+
+	const uint8x8_t mbuf_init = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
+
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
+		uint64_t addr0, addr1;
+		uint64x2_t dma_addr0, dma_addr1;
+		struct rte_mbuf *mb0, *mb1;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		addr0 = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(volatile uint64_t *, ptr0), dma_addr0);
+
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+		addr1 = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		vst1q_u64(RTE_CAST_PTR(volatile uint64_t *, ptr1), dma_addr1);
+#else
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		addr0 = (uintptr_t)RTE_PTR_ADD(mb0->buf_addr, RTE_PKTMBUF_HEADROOM);
+		dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(volatile uint64_t *, ptr0), dma_addr0);
+
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+		addr1 = (uintptr_t)RTE_PTR_ADD(mb1->buf_addr, RTE_PKTMBUF_HEADROOM);
+		dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		vst1q_u64(RTE_CAST_PTR(volatile uint64_t *, ptr1), dma_addr1);
+#endif
+	}
+}
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
+		return;
+
+	_ci_rxq_rearm_neon(rxq, desc_len);
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_NEON_H_ */
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 4b5a84d8ef..8a41db2df3 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -13,7 +13,7 @@
 
 #define RTE_I40E_VPMD_RX_BURST        32
 #define RTE_I40E_VPMD_TX_BURST        32
-#define RTE_I40E_RXQ_REARM_THRESH      32
+#define RTE_I40E_RXQ_REARM_THRESH      CI_VPMD_RX_REARM_THRESH
 #define RTE_I40E_MAX_RX_BURST          RTE_I40E_RXQ_REARM_THRESH
 #define RTE_I40E_TX_MAX_FREE_BUF_SZ    64
 #define RTE_I40E_DESCS_PER_LOOP    4
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
deleted file mode 100644
index fd9447014b..0000000000
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2015 Intel Corporation
- */
-
-#ifndef _I40E_RXTX_COMMON_AVX_H_
-#define _I40E_RXTX_COMMON_AVX_H_
-#include <stdint.h>
-#include <ethdev_driver.h>
-#include <rte_malloc.h>
-
-#include "i40e_ethdev.h"
-#include "i40e_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__*/
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
-			vaddr2_3 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__*/
-
-#endif /*_I40E_RXTX_COMMON_AVX_H_*/
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 0f3f7430aa..260b7d700a 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -11,14 +11,15 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_AVX2);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index f2292b45e8..be004e9f4f 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -11,7 +11,8 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
@@ -20,7 +21,7 @@
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_AVX512);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 814aa666dc..6c21546471 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -16,65 +16,12 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_I40E_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc));
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 74cd59e245..432177d499 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -12,78 +12,14 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 09/13] net/iavf: use common Rx rearm code
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (6 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 10/13] net/ixgbe: " Anatoly Burakov
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

The iavf driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/iavf/iavf_rxtx.h            |   4 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 199 ------------------
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  58 +----
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  72 +------
 6 files changed, 11 insertions(+), 328 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index c43ddc3c2f..d70250bf85 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -25,8 +25,8 @@
 /* used for Vector PMD */
 #define IAVF_VPMD_RX_MAX_BURST    32
 #define IAVF_VPMD_TX_MAX_BURST    32
-#define IAVF_RXQ_REARM_THRESH     32
-#define IAVF_VPMD_DESCS_PER_LOOP  4
+#define IAVF_RXQ_REARM_THRESH     CI_VPMD_RX_REARM_THRESH
+#define IAVF_VPMD_DESCS_PER_LOOP  CI_VPMD_DESCS_PER_LOOP
 #define IAVF_VPMD_TX_MAX_FREE_BUF 64
 
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index f51fa4acf9..496c7abc42 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2019 Intel Corporation
  */
 
+#include "../common/rx_vec_sse.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -9,7 +10,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), false);
 }
 
 #define PKTLEN_SHIFT     10
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index 80495f33cd..e7cd2b7c89 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include "../common/rx_vec_sse.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -30,7 +31,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index f0a7d19b6a..50228eb112 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -237,203 +237,4 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 	*txd_hi |= ((uint64_t)td_cmd) << IAVF_TXD_QW1_CMD_SHIFT;
 }
 
-#ifdef RTE_ARCH_X86
-static __rte_always_inline void
-iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxp,
-				 IAVF_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef CC_AVX512_SUPPORT
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-				i += 8, rxp += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-			mb4 = rxp[4];
-			mb5 = rxp[5];
-			mb6 = rxp[6];
-			mb7 = rxp[7];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-							vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-							vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
-			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
-		}
-	} else
-#endif
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-				i += 4, rxp += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
 #endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index e1c8f3c7f9..490028c68a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -14,64 +14,12 @@
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  IAVF_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i] = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0];
-		mb1 = rxep[1];
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc));
 }
 
 static inline void
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index f18dfd636c..3f0ca6cf8e 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -9,82 +9,14 @@
 #include "iavf.h"
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
-				 rxq->rx_free_thresh) < 0) {
-		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			rxq->rx_free_thresh;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += rxq->rx_free_thresh;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= rxq->rx_free_thresh;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-		   "rearm_start=%u rearm_nb=%u",
-		   rxq->port_id, rxq->queue_id,
-		   rx_id, rxq->rxrearm_start, rxq->rxrearm_nb);
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), false);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 10/13] net/ixgbe: use common Rx rearm code
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (7 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 09/13] net/iavf: " Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:28 ` [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The ixgbe driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  2 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c | 66 +---------------
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  | 75 +------------------
 3 files changed, 7 insertions(+), 136 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 84e28eb254..f3dd32b9ff 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -37,7 +37,7 @@
 #define RTE_IXGBE_DESCS_PER_LOOP    4
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
-#define RTE_IXGBE_RXQ_REARM_THRESH      32
+#define RTE_IXGBE_RXQ_REARM_THRESH      CI_VPMD_RX_REARM_THRESH
 #define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
 #endif
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 630a2e6a1d..0842f213ef 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -11,72 +11,12 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-	uint8x8_t p;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read),
-					  zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/*
-		 * Flush mbuf with pkt template.
-		 * Data to be rearmed is 6 bytes long.
-		 */
-		vst1_u8((uint8_t *)&mb0->rearm_data, p);
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		vst1_u8((uint8_t *)&mb1->rearm_data, p);
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ixgbe_adv_rx_desc));
 }
 
 static inline void
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index ecfb0d6ba6..c6e90b8d41 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -10,83 +10,14 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
-		vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* set Header Buffer Address to zero */
-		dma_addr0 =  _mm_and_si128(dma_addr0, hba_msk);
-		dma_addr1 =  _mm_and_si128(dma_addr1, hba_msk);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ixgbe_adv_rx_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifdef RTE_LIB_SECURITY
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (8 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 10/13] net/ixgbe: " Anatoly Burakov
@ 2025-05-06 13:28 ` Anatoly Burakov
  2025-05-06 13:28 ` [PATCH v1 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:28 UTC (permalink / raw)
  To: dev, Bruce Richardson

Currently, for 32-byte descriptor format, only SSE instruction set is
supported. Add implementation for AVX2 and AVX512 instruction sets. This
implementation similarly constant-propagates everything at compile time and
thus should not affect performance of existing code paths. To improve code
readability and reduce code duplication due to supporting different sized
descriptors, the implementation is also refactored.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx_vec_sse.h | 380 ++++++++++++++------------
 1 file changed, 205 insertions(+), 175 deletions(-)

diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
index 6fe0baf38b..0aeaac3dc9 100644
--- a/drivers/net/intel/common/rx_vec_sse.h
+++ b/drivers/net/intel/common/rx_vec_sse.h
@@ -48,223 +48,258 @@ _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
 	return 0;
 }
 
-/*
- * SSE code path can handle both 16-byte and 32-byte descriptors with one code
- * path, as we only ever write 16 bytes at a time.
- */
-static __rte_always_inline void
-_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+static __rte_always_inline __m128i
+_ci_rxq_rearm_desc_sse(const __m128i vaddr)
 {
 	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m128i zero = _mm_setzero_si128();
+	__m128i reg;
+
+	/* add headroom to address values */
+	reg = _mm_add_epi64(vaddr, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			 offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+{
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint8_t desc_per_reg = 1;
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile void *rxdp;
 	int i;
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
-		__m128i vaddr0, vaddr1;
-		__m128i dma_addr0, dma_addr1;
-		struct rte_mbuf *mb0, *mb1;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		const struct rte_mbuf *mb0 = rxp[0].mbuf;
+		const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
+		const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+		const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* add headroom to address values */
-		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
-		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
-
-#if RTE_IOVA_IN_MBUF
-		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
-#endif
+		const __m128i reg0 = _ci_rxq_rearm_desc_sse(vaddr0);
+		const __m128i reg1 = _ci_rxq_rearm_desc_sse(vaddr1);
 
 		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), reg0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), reg1);
 	}
 }
 
 #ifdef __AVX2__
-/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
-static __rte_always_inline void
-_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+static __rte_always_inline __m256i
+_ci_rxq_rearm_desc_avx2(const __m128i vaddr0, const __m128i vaddr1)
 {
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const size_t desc_len = 16;
-	volatile void *rxdp;
 	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m256i zero = _mm256_setzero_si256();
+	__m256i reg;
+
+	/* merge by casting 0 to 256-bit and inserting 1 into the high lanes */
+	reg =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+					vaddr1, 1);
+
+	/* add headroom to address values */
+	reg = _mm256_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm256_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm256_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m256i) / desc_len;
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
+	volatile void *rxdp;
 	int i;
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
+	/* Initialize the mbufs in vector, process 2 or 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		__m256i reg0, reg1;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-		mb2 = rxp[2].mbuf;
-		mb3 = rxp[3].mbuf;
+		if (desc_per_iter == 2) {
+			/* 16 byte descriptor, 16 byte zero, times two */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, zero);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr1, zero);
+		} else {
+			/* 16 byte descriptor times four */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-		/* add headroom to address values */
-		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
-		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
-		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, vaddr1);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr2, vaddr3);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), reg0);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), reg1);
 	}
 }
 #endif /* __AVX2__ */
 
 #ifdef __AVX512VL__
-/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline __m512i
+_ci_rxq_rearm_desc_avx512(const __m128i vaddr0, const __m128i vaddr1,
+		const __m128i vaddr2, const __m128i vaddr3)
+{
+	const __m512i zero = _mm512_setzero_si512();
+	const __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	__m256i vaddr0_1, vaddr2_3;
+	__m512i reg;
+
+	/**
+	 * merge 0 & 1, by casting 0 to 256-bit and inserting 1 into the high
+	 * lanes. Similarly for 2 & 3.
+	 */
+	vaddr0_1 =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+					vaddr1, 1);
+	vaddr2_3 =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+					vaddr3, 1);
+	/*
+	 * merge 0+1 & 2+3, by casting 0+1 to 512-bit and inserting 2+3 into the
+	 * high lanes.
+	 */
+	reg =
+		_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+					vaddr2_3, 1);
+
+	/* add headroom to address values */
+	reg = _mm512_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm512_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm512_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
 static __rte_always_inline void
-_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq, const size_t desc_len)
 {
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const size_t desc_len = 16;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m512i) / desc_len;
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile void *rxdp;
 	int i;
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-	__m512i dma_addr0_3, dma_addr4_7;
-	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-	__m512i zero = _mm512_setzero_si512();
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
-	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
+	/* Initialize the mbufs in vector, process 4 or 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-		__m256i vaddr0_1, vaddr2_3;
-		__m256i vaddr4_5, vaddr6_7;
-		__m512i vaddr0_3, vaddr4_7;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		__m512i reg0, reg1;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-		mb2 = rxp[2].mbuf;
-		mb3 = rxp[3].mbuf;
-		mb4 = rxp[4].mbuf;
-		mb5 = rxp[5].mbuf;
-		mb6 = rxp[6].mbuf;
-		mb7 = rxp[7].mbuf;
+		if (desc_per_iter == 4) {
+			/* 16-byte descriptor, 16 byte zero, times four */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3, and so on.
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-		vaddr4_5 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-						vaddr5, 1);
-		vaddr6_7 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-						vaddr7, 1);
-		vaddr0_3 =
-			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						vaddr2_3, 1);
-		vaddr4_7 =
-			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						vaddr6_7, 1);
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, zero, vaddr1, zero);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr2, zero, vaddr3, zero);
+		} else {
+			/* 16-byte descriptor times eight */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
+			const struct rte_mbuf *mb4 = rxp[4].mbuf;
+			const struct rte_mbuf *mb5 = rxp[5].mbuf;
+			const struct rte_mbuf *mb6 = rxp[6].mbuf;
+			const struct rte_mbuf *mb7 = rxp[7].mbuf;
 
-		/* add headroom to address values */
-		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
-		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
+			const __m128i vaddr4 = _mm_loadu_si128((const __m128i *)&mb4->buf_addr);
+			const __m128i vaddr5 = _mm_loadu_si128((const __m128i *)&mb5->buf_addr);
+			const __m128i vaddr6 = _mm_loadu_si128((const __m128i *)&mb6->buf_addr);
+			const __m128i vaddr7 = _mm_loadu_si128((const __m128i *)&mb7->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
-		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
-		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, vaddr1, vaddr2, vaddr3);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr4, vaddr5, vaddr6, vaddr7);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), reg0);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), reg1);
 	}
 }
 #endif /* __AVX512VL__ */
@@ -280,31 +315,26 @@ ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
 	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
 		return;
 
-	if (desc_len == 16) {
-		switch (vec_level) {
-		case CI_RX_VEC_LEVEL_AVX512:
+	switch (vec_level) {
+	case CI_RX_VEC_LEVEL_AVX512:
 #ifdef __AVX512VL__
-			_ci_rxq_rearm_avx512(rxq);
-			break;
+		_ci_rxq_rearm_avx512(rxq, desc_len);
+		break;
 #else
-			/* fall back to AVX2 unless requested not to */
-			/* fall through */
+		/* fall back to AVX2 unless requested not to */
+		/* fall through */
 #endif
-		case CI_RX_VEC_LEVEL_AVX2:
+	case CI_RX_VEC_LEVEL_AVX2:
 #ifdef __AVX2__
-			_ci_rxq_rearm_avx2(rxq);
+			_ci_rxq_rearm_avx2(rxq, desc_len);
 			break;
 #else
 			/* fall back to SSE if AVX2 isn't supported */
 			/* fall through */
 #endif
-		case CI_RX_VEC_LEVEL_SSE:
-			_ci_rxq_rearm_sse(rxq, desc_len);
-			break;
-		}
-	} else {
-		/* for 32-byte descriptors only support SSE */
+	case CI_RX_VEC_LEVEL_SSE:
 		_ci_rxq_rearm_sse(rxq, desc_len);
+		break;
 	}
 
 	rxq->rxrearm_start += rearm_thresh;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 12/13] net/intel: add common Rx mbuf recycle
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (9 preceding siblings ...)
  2025-05-06 13:28 ` [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
@ 2025-05-06 13:28 ` Anatoly Burakov
  2025-05-06 13:28 ` [PATCH v1 13/13] net/intel: add common Tx " Anatoly Burakov
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:28 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Rx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

While we're at it, also support no-IOVA-in-mbuf case.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 67 +++++++++++++++++++
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 37 +---------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 35 +---------
 3 files changed, 73 insertions(+), 66 deletions(-)
 create mode 100644 drivers/net/intel/common/recycle_mbufs.h

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
new file mode 100644
index 0000000000..fd31c5c1ff
--- /dev/null
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -0,0 +1,67 @@
+#ifndef _COMMON_INTEL_RECYCLE_MBUFS_H_
+#define _COMMON_INTEL_RECYCLE_MBUFS_H_
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <ethdev_driver.h>
+
+#include "rx.h"
+#include "tx.h"
+
+/**
+ * Recycle mbufs for Rx queue.
+ *
+ * @param rxq Rx queue pointer
+ * @param nb_mbufs number of mbufs to recycle
+ * @param desc_len length of Rx descriptor
+ */
+static __rte_always_inline void
+ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
+		const size_t desc_len)
+{
+	struct ci_rx_entry *rxep;
+	volatile void *rxdp;
+	uint16_t rx_id;
+	uint16_t i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	for (i = 0; i < nb_mbufs; i++) {
+		volatile uint64_t *cur = RTE_PTR_ADD(rxdp, i * desc_len);
+
+#if RTE_IOVA_IN_MBUF
+		const uint64_t paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(paddr);
+#else
+		const uint64_t vaddr = (uintptr_t)rxep[i].mbuf->buf_addr +
+			RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(vaddr);
+#endif
+
+		/* 8 bytes PBA followed by 8 bytes HBA */
+		*(cur + 1) = 0;
+		*cur = dma_addr;
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += nb_mbufs;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = 0;
+		rx_id = rxq->nb_rx_desc - 1;
+	}
+
+	rxq->rxrearm_nb -= nb_mbufs;
+
+	rte_io_wmb();
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index aa7703216d..073357bee2 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -10,43 +10,12 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union i40e_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* flush desc with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs, sizeof(union i40e_rx_desc));
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index 1df1787c7f..e2c3523ed2 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -8,41 +8,12 @@
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* Flush descriptors with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs, sizeof(union ixgbe_adv_rx_desc));
 }
 
 uint16_t
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v1 13/13] net/intel: add common Tx mbuf recycle
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (10 preceding siblings ...)
  2025-05-06 13:28 ` [PATCH v1 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
@ 2025-05-06 13:28 ` Anatoly Burakov
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:28 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Tx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 98 +++++++++++++++++++
 drivers/net/intel/common/tx.h                 |  1 +
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 88 +----------------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 89 +----------------
 4 files changed, 107 insertions(+), 169 deletions(-)

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
index fd31c5c1ff..88779c5aa4 100644
--- a/drivers/net/intel/common/recycle_mbufs.h
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -64,4 +64,102 @@ ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
 	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
 }
 
+/**
+ * Recycle buffers on Tx. Note: the function must first perform a driver-specific
+ * DD-bit-set check to ensure that the Tx descriptors are ready for recycling.
+ *
+ * @param txq Tx queue pointer
+ * @param recycle_rxq_info recycling mbuf information
+ *
+ * @return how many buffers were recycled
+ */
+static __rte_always_inline uint16_t
+ci_tx_recycle_mbufs(struct ci_tx_queue *txq,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct ci_tx_entry *txep;
+	struct rte_mbuf **rxep;
+	int i, n;
+	uint16_t nb_recycle_mbufs;
+	uint16_t avail = 0;
+	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+	uint16_t refill_head = *recycle_rxq_info->refill_head;
+	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+	/* Get available recycling Rx buffers. */
+	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+	/* Check Tx free thresh and Rx available space. */
+	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+		return 0;
+
+	n = txq->tx_rs_thresh;
+	nb_recycle_mbufs = n;
+
+	/* Mbufs recycle mode can only support no ring buffer wrapping around.
+	 * Two case for this:
+	 *
+	 * case 1: The refill head of Rx buffer ring needs to be aligned with
+	 * mbuf ring size. In this case, the number of Tx freeing buffers
+	 * should be equal to refill_requirement.
+	 *
+	 * case 2: The refill head of Rx ring buffer does not need to be aligned
+	 * with mbuf ring size. In this case, the update of refill head can not
+	 * exceed the Rx mbuf ring size.
+	 */
+	if ((refill_requirement && refill_requirement != n) ||
+		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+		return 0;
+
+	/* First buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1).
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+	rxep = recycle_rxq_info->mbuf_ring;
+	rxep += refill_head;
+
+	/* is fast-free enabled in offloads? */
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* Avoid txq containing buffers from unexpected mempool. */
+		if (unlikely(recycle_rxq_info->mp
+					!= txep[0].mbuf->pool))
+			return 0;
+
+		/* Directly put mbufs from Tx to Rx. */
+		for (i = 0; i < n; i++)
+			rxep[i] = txep[i].mbuf;
+	} else {
+		for (i = 0; i < n; i++) {
+			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+			/* If Tx buffers are not the last reference or from
+			 * unexpected mempool, previous copied buffers are
+			 * considered as invalid.
+			 */
+			if (unlikely(rxep[i] == NULL ||
+				recycle_rxq_info->mp != txep[i].mbuf->pool))
+				nb_recycle_mbufs = 0;
+		}
+		/* If Tx buffers are not the last reference or
+		 * from unexpected mempool, all recycled buffers
+		 * are put into mempool.
+		 */
+		if (nb_recycle_mbufs == 0)
+			for (i = 0; i < n; i++) {
+				if (rxep[i] != NULL)
+					rte_mempool_put(rxep[i]->pool, rxep[i]);
+			}
+	}
+
+	/* Update counters for Tx. */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+	return nb_recycle_mbufs;
+}
+
 #endif
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index c99bd5420f..cc70fa7db4 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -37,6 +37,7 @@ struct ci_tx_queue {
 		volatile struct ice_tx_desc *ice_tx_ring;
 		volatile struct idpf_base_tx_desc *idpf_tx_ring;
 		volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
+		volatile void *tx_ring; /**< Generic. */
 	};
 	volatile uint8_t *qtx_tail;               /* register address of tail */
 	union {
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 073357bee2..19edee781d 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -23,92 +23,12 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+	const uint64_t ctob = txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+	/* are Tx descriptors ready for recycling? */
+	if ((ctob & rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
 			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
 
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle mode can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * mbuf ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with mbuf ring size. In this case, the update of refill head can not
-	 * exceed the Rx mbuf ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
-		return 0;
-
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(txq, recycle_rxq_info);
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index e2c3523ed2..179205b422 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -21,92 +21,11 @@ ixgbe_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 		struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint32_t status;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+	const uint32_t status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
-	if (!(status & IXGBE_ADVTXD_STAT_DD))
-		return 0;
-
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * buffer ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with buffer ring size. In this case, the update of refill head can not
-	 * exceed the Rx buffer ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+	/* are Tx descriptors ready for recycling? */
+	if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
 		return 0;
 
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(tx_queue, recycle_rxq_info);
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (11 preceding siblings ...)
  2025-05-06 13:28 ` [PATCH v1 13/13] net/intel: add common Tx " Anatoly Burakov
@ 2025-05-12 10:58 ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
                     ` (11 more replies)
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
  14 siblings, 12 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
anywhere in the codebase, and can be removed.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 95c80ac1b8..0c07ce3186 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -3296,17 +3296,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	/*
 	 * Modified to setup VFRDT for Virtual Function
 	 */
-	if (ixgbe_is_vf(dev)) {
+	if (ixgbe_is_vf(dev))
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
-	} else {
+	else
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
-	}
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 641f982b01..20a5c5a0af 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -85,7 +85,6 @@ struct ixgbe_rx_queue {
 	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
 	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
 	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
 	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
 	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 02/13] net/iavf: make IPsec stats dynamically allocated
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
                     ` (10 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

Currently, the stats structure is directly embedded in the queue structure.
We're about to move iavf driver to a common Rx queue structure, so we can't
have driver-specific structures that aren't pointers, inside the common
queue structure. To prepare, we replace direct embedding into the queue
structure with a pointer to the stats structure.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/iavf/iavf_ethdev.c |  2 +-
 drivers/net/intel/iavf/iavf_rxtx.c   | 21 ++++++++++++++++++---
 drivers/net/intel/iavf/iavf_rxtx.h   |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index b3dacbef84..5babd587b3 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -1870,7 +1870,7 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 		struct iavf_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
 		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
-		stats = &rxq->stats.ipsec_crypto;
+		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
 		ips->ierrors.count += stats->ierrors.count;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 5411eb6897..d23d2df807 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -619,6 +619,18 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	/* Allocate stats */
+	rxq->stats = rte_zmalloc_socket("iavf rxq stats",
+				 sizeof(struct iavf_rx_queue_stats),
+				 RTE_CACHE_LINE_SIZE,
+				 socket_id);
+	if (!rxq->stats) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for "
+			     "rx queue stats");
+		rte_free(rxq);
+		return -ENOMEM;
+	}
+
 	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
 		proto_xtr = vf->proto_xtr ? vf->proto_xtr[queue_idx] :
 				IAVF_PROTO_XTR_NONE;
@@ -677,6 +689,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 				   socket_id);
 	if (!rxq->sw_ring) {
 		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -693,6 +706,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!mz) {
 		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for RX");
 		rte_free(rxq->sw_ring);
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -1054,6 +1068,7 @@ iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 	iavf_rxq_release_mbufs_ops[q->rel_mbufs_type].release_mbufs(q);
 	rte_free(q->sw_ring);
 	rte_memzone_free(q->mz);
+	rte_free(q->stats);
 	rte_free(q);
 }
 
@@ -1581,7 +1596,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(rxm, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -1750,7 +1765,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(first_seg, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -2034,7 +2049,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
 			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
 			iavf_flex_rxd_to_ipsec_crypto_status(mb, &rxdp[j],
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
 			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 0b5d67e718..62b5a67c84 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -268,7 +268,7 @@ struct iavf_rx_queue {
 	uint8_t proto_xtr; /* protocol extraction type */
 	uint64_t xtr_ol_flag;
 		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats stats;
+	struct iavf_rx_queue_stats *stats;
 	uint64_t offloads;
 	uint64_t phc_time;
 	uint64_t hw_time_update;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 03/13] net/ixgbe: create common Rx queue structure
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 04/13] net/i40e: use the " Anatoly Burakov
                     ` (9 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin

In preparation for deduplication effort, generalize the Rx queue structure.

Most of the fields are simply moved to common/rx.h, clarifying the comments
where necessary. There are some instances where the field is renamed when
moving, to make it more consistent with the rest of the codebase.

Specifically, the following fields are renamed:

- rdt_reg_addr -> qrx_tail (Rx ring tail register address)
- rx_using_sse -> vector_rx (indicates if vectorized path is enabled)
- mb_pool -> mp (other drivers use this name)

Additionally, some per-driver defines are now also moved to aforementioned
common Rx header, and re-defined in the driver using said common values.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  62 ++++++++
 drivers/net/intel/ixgbe/ixgbe_ethdev.c        |   8 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    |   8 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 149 +++++++++---------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  67 +-------
 .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |   4 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c |  22 +--
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  |  22 +--
 8 files changed, 172 insertions(+), 170 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index abb01ba5e7..524de39f9c 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -11,6 +11,68 @@
 #include <rte_ethdev.h>
 
 #define CI_RX_BURST 32
+#define CI_RX_MAX_BURST 32
+
+struct ci_rx_queue;
+
+struct ci_rx_entry {
+	struct rte_mbuf *mbuf; /* mbuf associated with RX descriptor. */
+};
+
+struct ci_rx_entry_sc {
+	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct ci_rx_queue {
+	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
+	union { /* RX ring virtual address */
+		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+	};
+	volatile uint8_t *qrx_tail;   /**< register address of tail */
+	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
+	struct ci_rx_entry_sc *sw_sc_ring; /**< address of scattered Rx software ring. */
+	rte_iova_t rx_ring_phys_addr; /**< RX ring DMA address. */
+	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+	/** hold packets to return to application */
+	struct rte_mbuf *rx_stage[CI_RX_MAX_BURST * 2];
+	uint16_t nb_rx_desc; /**< number of RX descriptors. */
+	uint16_t rx_tail;  /**< current value of tail register. */
+	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
+	uint16_t nb_rx_hold; /**< number of held free RX desc. */
+	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
+	uint16_t rx_free_thresh; /**< max free RX desc to hold. */
+	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
+	uint16_t rxrearm_nb;     /**< number of remaining to be re-armed */
+	uint16_t rxrearm_start;  /**< the idx we start the re-arming from */
+	uint16_t queue_id; /**< RX queue index. */
+	uint16_t port_id;  /**< Device port identifier. */
+	uint16_t reg_idx;  /**< RX queue register index. */
+	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool vector_rx; /**< indicates that vector RX is in use */
+	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
+	uint64_t mbuf_initializer; /**< value to init mbufs */
+	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
+	struct rte_mbuf fake_mbuf;
+	const struct rte_memzone *mz;
+	union {
+		struct { /* ixgbe specific values */
+			/** indicates that IPsec RX feature is in use */
+			uint8_t using_ipsec;
+			/** Packet type mask for different NICs. */
+			uint16_t pkt_type_mask;
+			/** UDP frames with a 0 checksum can be marked as checksum errors. */
+			uint8_t rx_udp_csum_zero_err;
+			/** flags to set in mbuf when a vlan is detected. */
+			uint64_t vlan_flags;
+		};
+	};
+};
 
 static inline uint16_t
 ci_rx_reassemble_packets(struct rte_mbuf **rx_bufs, uint16_t nb_bufs, uint8_t *split_flags,
diff --git a/drivers/net/intel/ixgbe/ixgbe_ethdev.c b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
index f1fd271a0a..df1eecc3c1 100644
--- a/drivers/net/intel/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
@@ -2022,7 +2022,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
 {
 	struct ixgbe_hwstrip *hwstrip =
 		IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
 		return;
@@ -2157,7 +2157,7 @@ ixgbe_vlan_hw_strip_config(struct rte_eth_dev *dev)
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t ctrl;
 	uint16_t i;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool on;
 
 	PMD_INIT_FUNC_TRACE();
@@ -2200,7 +2200,7 @@ ixgbe_config_vlan_strip_on_all_queues(struct rte_eth_dev *dev, int mask)
 {
 	uint16_t i;
 	struct rte_eth_rxmode *rxmode;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (mask & RTE_ETH_VLAN_STRIP_MASK) {
 		rxmode = &dev->data->dev_conf.rxmode;
@@ -5789,7 +5789,7 @@ ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
 static int
 ixgbevf_vlan_offload_config(struct rte_eth_dev *dev, int mask)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t i;
 	int on = 0;
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index c1b086ef6d..1df1787c7f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -11,15 +11,15 @@
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union ixgbe_adv_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
@@ -42,7 +42,7 @@ ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 0c07ce3186..4e4afd81e4 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -1423,11 +1423,11 @@ int
 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = &rxq->ixgbe_rx_ring[desc];
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.upper.status_error;
 
@@ -1567,10 +1567,10 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t pkt_flags;
@@ -1582,7 +1582,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 	uint64_t vlan_flags = rxq->vlan_flags;
 
 	/* get references to current descriptor and S/W ring entry */
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	status = rxdp->wb.upper.status_error;
@@ -1667,10 +1667,10 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 }
 
 static inline int
-ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
+ixgbe_rx_alloc_bufs(struct ci_rx_queue *rxq, bool reset_mbuf)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx;
 	__le64 dma_addr;
@@ -1679,12 +1679,12 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 	/* allocate buffers in bulk directly into the S/W ring */
 	alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
 	rxep = &rxq->sw_ring[alloc_idx];
-	diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
+	diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep,
 				    rxq->rx_free_thresh);
 	if (unlikely(diag != 0))
 		return -ENOMEM;
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = &rxq->ixgbe_rx_ring[alloc_idx];
 	for (i = 0; i < rxq->rx_free_thresh; ++i) {
 		/* populate the static rte mbuf fields */
 		mb = rxep[i].mbuf;
@@ -1711,7 +1711,7 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 }
 
 static inline uint16_t
-ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+ixgbe_rx_fill_from_stage(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
 	struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
@@ -1735,7 +1735,7 @@ static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	     uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	/* Any previously recv'd pkts will be returned from the Rx stage */
@@ -1778,8 +1778,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		/* update tail pointer */
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
-					    cur_free_trigger);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, cur_free_trigger);
 	}
 
 	if (rxq->rx_tail >= rxq->nb_rx_desc)
@@ -1825,11 +1824,11 @@ uint16_t
 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union ixgbe_adv_rx_desc *rx_ring;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
-	struct ixgbe_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
 	union ixgbe_adv_rx_desc rxd;
@@ -1847,7 +1846,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = rxq->ixgbe_rx_ring;
 	sw_ring = rxq->sw_ring;
 	vlan_flags = rxq->vlan_flags;
 	while (nb_rx < nb_pkts) {
@@ -1908,7 +1907,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) rx_id, (unsigned) staterr,
 			   (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
 
-		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (nmb == NULL) {
 			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
 				   "queue_id=%u", (unsigned) rxq->port_id,
@@ -2017,7 +2016,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) nb_rx);
 		rx_id = (uint16_t) ((rx_id == 0) ?
 				     (rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+		IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -2052,7 +2051,7 @@ static inline void
 ixgbe_fill_cluster_head_buf(
 	struct rte_mbuf *head,
 	union ixgbe_adv_rx_desc *desc,
-	struct ixgbe_rx_queue *rxq,
+	struct ci_rx_queue *rxq,
 	uint32_t staterr)
 {
 	uint32_t pkt_info;
@@ -2114,10 +2113,10 @@ static inline uint16_t
 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 		    bool bulk_alloc)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
-	struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
-	struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->ixgbe_rx_ring;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry_sc *sw_sc_ring = rxq->sw_sc_ring;
 	uint16_t rx_id = rxq->rx_tail;
 	uint16_t nb_rx = 0;
 	uint16_t nb_hold = rxq->nb_rx_hold;
@@ -2125,10 +2124,10 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 
 	while (nb_rx < nb_pkts) {
 		bool eop;
-		struct ixgbe_rx_entry *rxe;
-		struct ixgbe_scattered_rx_entry *sc_entry;
-		struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
-		struct ixgbe_rx_entry *next_rxe = NULL;
+		struct ci_rx_entry *rxe;
+		struct ci_rx_entry_sc *sc_entry;
+		struct ci_rx_entry_sc *next_sc_entry = NULL;
+		struct ci_rx_entry *next_rxe = NULL;
 		struct rte_mbuf *first_seg;
 		struct rte_mbuf *rxm;
 		struct rte_mbuf *nmb = NULL;
@@ -2165,7 +2164,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rte_le_to_cpu_16(rxd.wb.upper.length));
 
 		if (!bulk_alloc) {
-			nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+			nmb = rte_mbuf_raw_alloc(rxq->mp);
 			if (nmb == NULL) {
 				PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
 						  "port_id=%u queue_id=%u",
@@ -2181,7 +2180,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			if (!ixgbe_rx_alloc_bufs(rxq, false)) {
 				rte_wmb();
 				IXGBE_PCI_REG_WC_WRITE_RELAXED(
-							rxq->rdt_reg_addr,
+							rxq->qrx_tail,
 							next_rdt);
 				nb_hold -= rxq->rx_free_thresh;
 			} else {
@@ -2347,7 +2346,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
 
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, prev_id);
 		nb_hold = 0;
 	}
 
@@ -2969,12 +2968,12 @@ ixgbe_free_sc_cluster(struct rte_mbuf *m)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		ixgbe_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -3006,7 +3005,7 @@ ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release(struct ci_rx_queue *rxq)
 {
 	if (rxq != NULL) {
 		ixgbe_rx_queue_release_mbufs(rxq);
@@ -3032,7 +3031,7 @@ ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
  *           function must be used.
  */
 static inline int __rte_cold
-check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -3069,7 +3068,7 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
 
 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
 static void __rte_cold
-ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
+ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ci_rx_queue *rxq)
 {
 	static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
 	unsigned i;
@@ -3090,7 +3089,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
 	 * reads extra memory as zeros.
 	 */
 	for (i = 0; i < len; i++) {
-		rxq->rx_ring[i] = zeroed_desc;
+		rxq->ixgbe_rx_ring[i] = zeroed_desc;
 	}
 
 	/*
@@ -3205,7 +3204,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			 struct rte_mempool *mp)
 {
 	const struct rte_memzone *rz;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_hw     *hw;
 	uint16_t len;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
@@ -3234,11 +3233,11 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 
 	/* First allocate the rx queue data structure */
-	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
+	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE, socket_id);
 	if (rxq == NULL)
 		return -ENOMEM;
-	rxq->mb_pool = mp;
+	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
 	rxq->queue_id = queue_idx;
@@ -3297,14 +3296,14 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 * Modified to setup VFRDT for Virtual Function
 	 */
 	if (ixgbe_is_vf(dev))
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
 	else
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
+	rxq->ixgbe_rx_ring = (union ixgbe_adv_rx_desc *)rz->addr;
 
 	/*
 	 * Certain constraints must be met in order to use the bulk buffer
@@ -3329,7 +3328,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		len += RTE_PMD_IXGBE_RX_MAX_BURST;
 
 	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
-					  sizeof(struct ixgbe_rx_entry) * len,
+					  sizeof(struct ci_rx_entry) * len,
 					  RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3346,7 +3345,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 */
 	rxq->sw_sc_ring =
 		rte_zmalloc_socket("rxq->sw_sc_ring",
-				   sizeof(struct ixgbe_scattered_rx_entry) * len,
+				   sizeof(struct ci_rx_entry_sc) * len,
 				   RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_sc_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3355,7 +3354,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
 			    "dma_addr=0x%"PRIx64,
-		     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
+		     rxq->sw_ring, rxq->sw_sc_ring, rxq->ixgbe_rx_ring,
 		     rxq->rx_ring_phys_addr);
 
 	if (!rte_is_power_of_2(nb_desc)) {
@@ -3379,11 +3378,11 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 {
 #define IXGBE_RXQ_SCAN_INTERVAL 4
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 
 	while ((desc < rxq->nb_rx_desc) &&
 		(rxdp->wb.upper.status_error &
@@ -3391,7 +3390,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 		desc += IXGBE_RXQ_SCAN_INTERVAL;
 		rxdp += IXGBE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
+			rxdp = &(rxq->ixgbe_rx_ring[rxq->rx_tail +
 				desc - rxq->nb_rx_desc]);
 	}
 
@@ -3401,7 +3400,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 int
 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint32_t *status;
 	uint32_t nb_hold, desc;
 
@@ -3409,7 +3408,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 		return -EINVAL;
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-	if (rxq->rx_using_sse)
+	if (rxq->vector_rx)
 		nb_hold = rxq->rxrearm_nb;
 	else
 #endif
@@ -3421,7 +3420,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.upper.status_error;
+	status = &rxq->ixgbe_rx_ring[desc].wb.upper.status_error;
 	if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
 		return RTE_ETH_RX_DESC_DONE;
 
@@ -3506,7 +3505,7 @@ ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
 	}
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 		if (rxq != NULL) {
 			ixgbe_rx_queue_release_mbufs(rxq);
@@ -4668,16 +4667,16 @@ ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
 }
 
 static int __rte_cold
-ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ixgbe_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	unsigned int i;
 
 	/* Initialize software ring entries */
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ixgbe_adv_rx_desc *rxd;
-		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
+		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (mbuf == NULL) {
 			PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
@@ -4690,7 +4689,7 @@ ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
 
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
-		rxd = &rxq->rx_ring[i];
+		rxd = &rxq->ixgbe_rx_ring[i];
 		rxd->read.hdr_addr = 0;
 		rxd->read.pkt_addr = dma_addr;
 		rxe[i].mbuf = mbuf;
@@ -5109,9 +5108,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 		dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
-		rxq->rx_using_sse = rx_using_sse;
+		rxq->vector_rx = rx_using_sse;
 #ifdef RTE_LIB_SECURITY
 		rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_SECURITY);
@@ -5187,7 +5186,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 
 	/* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		uint32_t srrctl =
 			IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
 		uint32_t rscctl =
@@ -5217,7 +5216,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 		 */
 
 		rscctl |= IXGBE_RSCCTL_RSCEN;
-		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
+		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mp);
 		psrtype |= IXGBE_PSRTYPE_TCPHDR;
 
 		/*
@@ -5263,7 +5262,7 @@ int __rte_cold
 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint64_t bus_addr;
 	uint32_t rxctrl;
 	uint32_t fctrl;
@@ -5374,7 +5373,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -5559,7 +5558,7 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t dmatxctl;
 	uint32_t rxctrl;
@@ -5646,7 +5645,7 @@ int __rte_cold
 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5689,7 +5688,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5823,11 +5822,11 @@ void
 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
-	qinfo->mp = rxq->mb_pool;
+	qinfo->mp = rxq->mp;
 	qinfo->scattered_rx = dev->data->scattered_rx;
 	qinfo->nb_desc = rxq->nb_rx_desc;
 
@@ -5861,13 +5860,13 @@ void
 ixgbe_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
 
 	rxq = dev->data->rx_queues[queue_id];
 
 	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
-	recycle_rxq_info->mp = rxq->mb_pool;
+	recycle_rxq_info->mp = rxq->mp;
 	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
 	recycle_rxq_info->receive_tail = &rxq->rx_tail;
 
@@ -5889,7 +5888,7 @@ int __rte_cold
 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
 	uint64_t bus_addr;
@@ -5972,7 +5971,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -6076,7 +6075,7 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t rxdctl;
 	uint16_t i;
@@ -6270,7 +6269,7 @@ ixgbe_recv_scattered_pkts_vec(
 }
 
 int
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue __rte_unused * rxq)
 {
 	return -1;
 }
@@ -6290,7 +6289,7 @@ ixgbe_txq_vec_setup(struct ci_tx_queue *txq __rte_unused)
 }
 
 void
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue __rte_unused * rxq)
 {
 	return;
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 20a5c5a0af..84e28eb254 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _IXGBE_RXTX_H_
 #define _IXGBE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 
 /*
@@ -30,7 +31,7 @@
 #define	IXGBE_MAX_RING_DESC	8192
 
 #define RTE_PMD_IXGBE_TX_MAX_BURST 32
-#define RTE_PMD_IXGBE_RX_MAX_BURST 32
+#define RTE_PMD_IXGBE_RX_MAX_BURST CI_RX_MAX_BURST
 #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
 
 #define RTE_IXGBE_DESCS_PER_LOOP    4
@@ -66,66 +67,6 @@
 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
 #define IXGBE_PACKET_TYPE_SHIFT             0X04
 
-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct ixgbe_rx_entry {
-	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-struct ixgbe_scattered_rx_entry {
-	struct rte_mbuf *fbuf; /**< First segment of the fragmented packet. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct ixgbe_rx_queue {
-	struct rte_mempool  *mb_pool; /**< mbuf pool to populate RX ring. */
-	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
-	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
-	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
-	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
-	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
-	uint64_t            mbuf_initializer; /**< value to init mbufs */
-	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
-	uint16_t            rx_tail;  /**< current value of RDT register. */
-	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
-	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
-	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	uint8_t            rx_using_sse;
-	/**< indicates that vector RX is in use */
-#ifdef RTE_LIB_SECURITY
-	uint8_t            using_ipsec;
-	/**< indicates that IPsec RX feature is in use */
-#endif
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
-	uint16_t            rxrearm_nb;     /**< number of remaining to be re-armed */
-	uint16_t            rxrearm_start;  /**< the idx we start the re-arming from */
-#endif
-	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
-	uint16_t            queue_id; /**< RX queue index. */
-	uint16_t            reg_idx;  /**< RX queue register index. */
-	uint16_t            pkt_type_mask;  /**< Packet type mask for different NICs. */
-	uint16_t            port_id;  /**< Device port identifier. */
-	uint8_t             crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
-	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
-	uint8_t             rx_deferred_start; /**< not in global dev start. */
-	/** UDP frames with a 0 checksum can be marked as checksum errors. */
-	uint8_t             rx_udp_csum_zero_err;
-	/** flags to set in mbuf when a vlan is detected. */
-	uint64_t            vlan_flags;
-	uint64_t	    offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
-	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
-	struct rte_mbuf fake_mbuf;
-	/** hold packets to return to application */
-	struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
-	const struct rte_memzone *mz;
-};
-
 /**
  * IXGBE CTX Constants
  */
@@ -230,8 +171,8 @@ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t ixgbe_recv_scattered_pkts_vec(void *rx_queue,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);
-void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
+int ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq);
+void ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 int ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt);
 
 extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
index 018010820f..0ba3d7a4c0 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
@@ -69,7 +69,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 }
 
 static inline void
-_ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+_ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 
@@ -173,7 +173,7 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 		return -1;
 
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads))
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 9ccd8eba25..630a2e6a1d 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -12,22 +12,22 @@
 #include "ixgbe_rxtx_vec_common.h"
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 	uint8x8_t p;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
+	if (unlikely(rte_mempool_get_bulk(rxq->mp,
 					  (void *)rxep,
 					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -76,7 +76,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 static inline void
@@ -282,11 +282,11 @@ desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint8x16_t shuf_msk = {
@@ -309,7 +309,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -488,7 +488,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -634,7 +634,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -657,7 +657,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index e125f52cc5..ecfb0d6ba6 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -13,12 +13,12 @@
 #include <rte_vect.h>
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
@@ -26,10 +26,10 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 
 	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mb_pool,
+	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
 				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -86,7 +86,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
 
 #ifdef RTE_LIB_SECURITY
@@ -327,11 +327,11 @@ desc_to_ptype_v(__m128i descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 #ifdef RTE_LIB_SECURITY
 	uint8_t use_ipsec = rxq->using_ipsec;
@@ -377,7 +377,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,7 +755,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -778,7 +778,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 04/13] net/i40e: use the common Rx queue structure
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 05/13] net/ice: " Anatoly Burakov
                     ` (8 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

Make the i40e driver use the new common Rx queue structure.

Because the i40e driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_I40E_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue structure
will have both, while the actual descriptor format is picked by i40e at
compile time using the above macro. Direct usage of Rx queue structure is
now meant to be replaced with a macro access that takes descriptor size
into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  14 ++
 drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
 drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
 drivers/net/intel/i40e/i40e_fdir.c            |  16 +--
 .../i40e/i40e_recycle_mbufs_vec_common.c      |   6 +-
 drivers/net/intel/i40e/i40e_rxtx.c            | 126 +++++++++---------
 drivers/net/intel/i40e/i40e_rxtx.h            |  74 +++-------
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h |   6 +-
 .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  22 +--
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  12 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  12 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  24 ++--
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  24 ++--
 14 files changed, 160 insertions(+), 188 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 524de39f9c..db49db57d0 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -30,6 +30,8 @@ struct ci_rx_queue {
 	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
 	union { /* RX ring virtual address */
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
+		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -51,14 +53,22 @@ struct ci_rx_queue {
 	uint16_t queue_id; /**< RX queue index. */
 	uint16_t port_id;  /**< Device port identifier. */
 	uint16_t reg_idx;  /**< RX queue register index. */
+	uint16_t rx_buf_len; /* The packet buffer size */
+	uint16_t rx_hdr_len; /* The header buffer size */
+	uint16_t max_pkt_len; /* Maximum packet length */
 	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool q_set; /**< indicate if rx queue has been configured */
 	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
 	bool vector_rx; /**< indicates that vector RX is in use */
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
+	union { /* the VSI this queue belongs to */
+		struct i40e_vsi *i40e_vsi;
+	};
 	const struct rte_memzone *mz;
 	union {
 		struct { /* ixgbe specific values */
@@ -71,6 +81,10 @@ struct ci_rx_queue {
 			/** flags to set in mbuf when a vlan is detected. */
 			uint64_t vlan_flags;
 		};
+		struct { /* i40e specific values */
+			uint8_t hs_mode; /**< Header Split mode */
+			uint8_t dcb_tc; /**< Traffic class of rx queue */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/i40e/i40e_ethdev.c b/drivers/net/intel/i40e/i40e_ethdev.c
index 90eba3419f..e0a865845b 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.c
+++ b/drivers/net/intel/i40e/i40e_ethdev.c
@@ -6609,7 +6609,7 @@ i40e_dev_rx_init(struct i40e_pf *pf)
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int ret = I40E_SUCCESS;
 	uint16_t i;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	i40e_pf_config_rss(pf);
 	for (i = 0; i < data->nb_rx_queues; i++) {
@@ -8974,7 +8974,7 @@ i40e_pf_calc_configured_queues_num(struct i40e_pf *pf)
 {
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int i, num;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	num = 0;
 	for (i = 0; i < pf->lan_nb_qps; i++) {
diff --git a/drivers/net/intel/i40e/i40e_ethdev.h b/drivers/net/intel/i40e/i40e_ethdev.h
index ccc8732d7d..44864292d0 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.h
+++ b/drivers/net/intel/i40e/i40e_ethdev.h
@@ -333,7 +333,7 @@ struct i40e_vsi_list {
 	struct i40e_vsi *vsi;
 };
 
-struct i40e_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 /* Bandwidth limit information */
@@ -739,7 +739,7 @@ struct i40e_fdir_info {
 	struct i40e_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	uint16_t match_counter_index;  /* Statistic counter index used for fdir*/
 	struct ci_tx_queue *txq;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt[I40E_FDIR_PRG_PKT_CNT];     /* memory for fdir program packet */
 	uint64_t dma_addr[I40E_FDIR_PRG_PKT_CNT]; /* physic address of packet memory*/
 	/*
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 94e3ab44e3..eadcf63d1d 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -100,9 +100,9 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status);
 
 static int
-i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_fdir_rx_queue_init(struct ci_rx_queue *rxq)
 {
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct i40e_hmc_obj_rxq rx_ctx;
 	int err = I40E_SUCCESS;
 
@@ -139,7 +139,7 @@ i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
 		return err;
 	}
 	rxq->qrx_tail = hw->hw_addr +
-		I40E_QRX_TAIL(rxq->vsi->base_queue);
+		I40E_QRX_TAIL(rxq->i40e_vsi->base_queue);
 
 	rte_wmb();
 	/* Init the RX tail register. */
@@ -382,7 +382,7 @@ i40e_fdir_rx_proc_enable(struct rte_eth_dev *dev, bool on)
 	int32_t i;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		rxq->fdir_enabled = on;
@@ -929,7 +929,7 @@ i40e_build_ctob(uint32_t td_cmd,
  * tx queue
  */
 static inline int
-i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
+i40e_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
 	uint64_t qword1;
@@ -938,7 +938,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 	uint32_t error;
 	int ret = 0;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & I40E_RXD_QW1_STATUS_MASK)
 			>> I40E_RXD_QW1_STATUS_SHIFT;
@@ -987,7 +987,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 }
 
 static inline void
-i40e_fdir_programming_status_cleanup(struct i40e_rx_queue *rxq)
+i40e_fdir_programming_status_cleanup(struct ci_rx_queue *rxq)
 {
 	uint16_t retry_count = 0;
 
@@ -1627,7 +1627,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct i40e_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	const struct i40e_fdir_action *fdir_action = &filter->action;
 	volatile struct i40e_tx_desc *txdp;
 	volatile struct i40e_filter_program_desc *fdirdp;
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 2875c578af..aa7703216d 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -13,15 +13,15 @@
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union i40e_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index c3ff2e05c3..96490296ba 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -94,12 +94,12 @@ i40e_monitor_callback(const uint64_t value,
 int
 i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile union i40e_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = I40E_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.qword1.status_error_len;
 
@@ -416,9 +416,9 @@ i40e_xmit_cleanup(struct ci_tx_queue *txq)
 
 static inline int
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-check_rx_burst_bulk_alloc_preconditions(struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 #else
-check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(__rte_unused struct ci_rx_queue *rxq)
 #endif
 {
 	int ret = 0;
@@ -456,10 +456,10 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
 #error "PMD I40E: I40E_LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
+i40e_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -467,9 +467,9 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 	int32_t s[I40E_LOOK_AHEAD], var, nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
@@ -558,7 +558,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 }
 
 static inline uint16_t
-i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
+i40e_rx_fill_from_stage(struct ci_rx_queue *rxq,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
@@ -577,10 +577,10 @@ i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
 }
 
 static inline int
-i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
+i40e_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -597,7 +597,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = I40E_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -629,7 +629,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = (struct i40e_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	struct rte_eth_dev *dev;
 	uint16_t nb_rx = 0;
 
@@ -648,7 +648,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (i40e_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 
@@ -707,12 +707,12 @@ i40e_recv_pkts_bulk_alloc(void __rte_unused *rx_queue,
 uint16_t
 i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union i40e_rx_desc *rx_ring;
 	volatile union i40e_rx_desc *rxdp;
 	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -729,9 +729,9 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = I40E_RX_RING(rxq);
 	sw_ring = rxq->sw_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -745,7 +745,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -837,12 +837,12 @@ i40e_recv_scattered_pkts(void *rx_queue,
 			 struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	volatile union i40e_rx_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union i40e_rx_desc *rx_ring = I40E_RX_RING(rxq);
 	volatile union i40e_rx_desc *rxdp;
 	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring = rxq->sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -853,7 +853,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 	uint64_t qword1;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -867,7 +867,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -1798,7 +1798,7 @@ i40e_get_queue_offset_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
 int
 i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -1841,7 +1841,7 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -2004,7 +2004,7 @@ i40e_dev_first_queue(uint16_t idx, void **queues, int num)
 
 static int
 i40e_dev_rx_queue_setup_runtime(struct rte_eth_dev *dev,
-				struct i40e_rx_queue *rxq)
+				struct ci_rx_queue *rxq)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
@@ -2081,7 +2081,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct i40e_vsi *vsi;
 	struct i40e_pf *pf = NULL;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size;
 	uint16_t len, i;
@@ -2116,7 +2116,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("i40e rx queue",
-				 sizeof(struct i40e_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -2135,7 +2135,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	else
 		rxq->crc_len = 0;
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->i40e_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->offloads = offloads;
 
@@ -2164,14 +2164,14 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	memset(rz->addr, 0, ring_size);
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	I40E_RX_RING(rxq) = (union i40e_rx_desc *)rz->addr;
 
 	len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST);
 
 	/* Allocate the software ring. */
 	rxq->sw_ring =
 		rte_zmalloc_socket("i40e rx sw ring",
-				   sizeof(struct i40e_rx_entry) * len,
+				   sizeof(struct ci_rx_entry) * len,
 				   RTE_CACHE_LINE_SIZE,
 				   socket_id);
 	if (!rxq->sw_ring) {
@@ -2242,7 +2242,7 @@ i40e_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 void
 i40e_rx_queue_release(void *rxq)
 {
-	struct i40e_rx_queue *q = (struct i40e_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -2260,11 +2260,11 @@ i40e_dev_rx_queue_count(void *rx_queue)
 {
 #define I40E_RXQ_SCAN_INTERVAL 4
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	while ((desc < rxq->nb_rx_desc) &&
 		((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT) &
@@ -2277,8 +2277,8 @@ i40e_dev_rx_queue_count(void *rx_queue)
 		desc += I40E_RXQ_SCAN_INTERVAL;
 		rxdp += I40E_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-					desc - rxq->nb_rx_desc]);
+			rxdp = I40E_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -2287,7 +2287,7 @@ i40e_dev_rx_queue_count(void *rx_queue)
 int
 i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -2302,7 +2302,7 @@ i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
+	status = &I40E_RX_RING_PTR(rxq, desc)->wb.qword1.status_error_len;
 	mask = rte_le_to_cpu_64((1ULL << I40E_RX_DESC_STATUS_DD_SHIFT)
 		<< I40E_RXD_QW1_STATUS_SHIFT);
 	if (*status & mask)
@@ -2628,12 +2628,12 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
 }
 
 void
-i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		i40e_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -2663,7 +2663,7 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 }
 
 void
-i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
+i40e_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 	uint16_t len;
@@ -2681,7 +2681,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 		len = rxq->nb_rx_desc;
 
 	for (i = 0; i < len * sizeof(union i40e_rx_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)I40E_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; ++i)
@@ -2898,9 +2898,9 @@ i40e_tx_queue_init(struct ci_tx_queue *txq)
 }
 
 int
-i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
+i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct i40e_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
@@ -2922,7 +2922,7 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = I40E_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
@@ -2941,10 +2941,10 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
  * and maximum packet length.
  */
 static int
-i40e_rx_queue_config(struct i40e_rx_queue *rxq)
+i40e_rx_queue_config(struct ci_rx_queue *rxq)
 {
-	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->vsi);
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->i40e_vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct rte_eth_dev_data *data = pf->dev_data;
 	uint16_t buf_size;
 
@@ -2988,11 +2988,11 @@ i40e_rx_queue_config(struct i40e_rx_queue *rxq)
 
 /* Init the RX queue in hardware */
 int
-i40e_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_rx_queue_init(struct ci_rx_queue *rxq)
 {
 	int err = I40E_SUCCESS;
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
-	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
+	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->i40e_vsi);
 	uint16_t pf_q = rxq->reg_idx;
 	uint16_t buf_size;
 	struct i40e_hmc_obj_rxq rx_ctx;
@@ -3166,7 +3166,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 enum i40e_status_code
 i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -3180,7 +3180,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("i40e fdir rx queue",
-				  sizeof(struct i40e_rx_queue),
+				  sizeof(struct ci_rx_queue),
 				  RTE_CACHE_LINE_SIZE,
 				  SOCKET_ID_ANY);
 	if (!rxq) {
@@ -3206,11 +3206,11 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 	rxq->nb_rx_desc = I40E_FDIR_NUM_RX_DESC;
 	rxq->queue_id = I40E_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->i40e_vsi = pf->fdir.fdir_vsi;
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, I40E_FDIR_NUM_RX_DESC * sizeof(union i40e_rx_desc));
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	I40E_RX_RING(rxq) = (union i40e_rx_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -3226,7 +3226,7 @@ void
 i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -3264,7 +3264,7 @@ void
 i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
@@ -3335,7 +3335,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 		if (ad->rx_vec_allowed) {
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
-				struct i40e_rx_queue *rxq =
+				struct ci_rx_queue *rxq =
 					dev->data->rx_queues[i];
 
 				if (rxq && i40e_rxq_vec_setup(rxq)) {
@@ -3438,10 +3438,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 			 dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
 
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
-			struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+			struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 			if (rxq)
-				rxq->rx_using_sse = rx_using_sse;
+				rxq->vector_rx = rx_using_sse;
 		}
 	}
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 2f32fc5686..4b5a84d8ef 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -6,8 +6,9 @@
 #define _I40E_RXTX_H_
 
 #include "../common/tx.h"
+#include "../common/rx.h"
 
-#define RTE_PMD_I40E_RX_MAX_BURST 32
+#define RTE_PMD_I40E_RX_MAX_BURST CI_RX_MAX_BURST
 #define RTE_PMD_I40E_TX_MAX_BURST 32
 
 #define RTE_I40E_VPMD_RX_BURST        32
@@ -67,62 +68,19 @@ enum i40e_header_split_mode {
 			       I40E_HEADER_SPLIT_UDP_TCP | \
 			       I40E_HEADER_SPLIT_SCTP)
 
-/* HW desc structure, both 16-byte and 32-byte types are supported */
+/* HW desc structures, both 16-byte and 32-byte types are supported */
 #ifdef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 #define i40e_rx_desc i40e_16byte_rx_desc
+#define I40E_RX_RING(rxq) \
+	((rxq)->i40e_rx_16b_ring)
 #else
 #define i40e_rx_desc i40e_32byte_rx_desc
+#define I40E_RX_RING(rxq) \
+	((rxq)->i40e_rx_32b_ring)
 #endif
 
-struct i40e_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
-/*
- * Structure associated with each RX queue.
- */
-struct i40e_rx_queue {
-	struct rte_mempool *mp; /**< mbuf pool to populate RX ring */
-	volatile union i40e_rx_desc *rx_ring;/**< RX ring virtual address */
-	uint64_t rx_ring_phys_addr; /**< RX ring DMA address */
-	struct i40e_rx_entry *sw_ring; /**< address of RX soft ring */
-	uint16_t nb_rx_desc; /**< number of RX descriptors */
-	uint16_t rx_free_thresh; /**< max free RX desc to hold */
-	uint16_t rx_tail; /**< current value of tail */
-	uint16_t nb_rx_hold; /**< number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[RTE_PMD_I40E_RX_MAX_BURST * 2];
-#endif
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /**< device port ID */
-	uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /**< 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /**< RX queue index */
-	uint16_t reg_idx; /**< RX queue register index */
-	uint8_t drop_en; /**< if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /**< register address of tail */
-	struct i40e_vsi *vsi; /**< the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	uint8_t hs_mode; /* Header Split mode */
-	bool q_set; /**< indicate if rx queue has been configured */
-	bool rx_deferred_start; /**< don't start this queue in dev start */
-	uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
-	uint8_t dcb_tc;         /**< Traffic class of rx queue */
-	uint64_t offloads; /**< Rx offload flags of RTE_ETH_RX_OFFLOAD_* */
-	const struct rte_memzone *mz;
-};
+#define I40E_RX_RING_PTR(rxq, entry) \
+	(I40E_RX_RING(rxq) + (entry))
 
 /** Offload features */
 union i40e_tx_offload {
@@ -172,16 +130,16 @@ uint16_t i40e_simple_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 int i40e_tx_queue_init(struct ci_tx_queue *txq);
-int i40e_rx_queue_init(struct i40e_rx_queue *rxq);
+int i40e_rx_queue_init(struct ci_rx_queue *rxq);
 void i40e_free_tx_resources(struct ci_tx_queue *txq);
-void i40e_free_rx_resources(struct i40e_rx_queue *rxq);
+void i40e_free_rx_resources(struct ci_rx_queue *rxq);
 void i40e_dev_clear_queues(struct rte_eth_dev *dev);
 void i40e_dev_free_queues(struct rte_eth_dev *dev);
-void i40e_reset_rx_queue(struct i40e_rx_queue *rxq);
+void i40e_reset_rx_queue(struct ci_rx_queue *rxq);
 void i40e_reset_tx_queue(struct ci_tx_queue *txq);
 int i40e_tx_done_cleanup(void *txq, uint32_t free_cnt);
-int i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq);
-void i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq);
+int i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq);
 
 uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
@@ -197,9 +155,9 @@ uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts);
 int i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int i40e_rxq_vec_setup(struct i40e_rx_queue *rxq);
+int i40e_rxq_vec_setup(struct ci_rx_queue *rxq);
 int i40e_txq_vec_setup(struct ci_tx_queue *txq);
-void i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 uint16_t i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				   uint16_t nb_pkts);
 void i40e_set_rx_function(struct rte_eth_dev *dev);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
index b66a808f9f..fd9447014b 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
@@ -13,14 +13,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index 42beff6e89..3e4109e82e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -16,13 +16,13 @@
 #include <rte_altivec.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
 
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 
 	__vector unsigned long hdr_room = (__vector unsigned long){
@@ -30,7 +30,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 						RTE_PKTMBUF_HEADROOM};
 	__vector unsigned long dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING(rxq) + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -195,16 +195,16 @@ desc_to_ptype_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__vector unsigned char shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__vector unsigned short crc_adjust = (__vector unsigned short){
 		0, 0,         /* ignore pkt_type field */
@@ -221,7 +221,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -465,7 +465,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -611,15 +611,15 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 9c406e7a6f..0f3f7430aa 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -16,7 +16,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, false);
 }
@@ -105,16 +105,16 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union i40e_rx_desc *rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 	rte_prefetch0(rxdp);
 
@@ -625,7 +625,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index d8244556c0..f2292b45e8 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -18,7 +18,7 @@
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, true);
 }
@@ -108,14 +108,14 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			  uint16_t nb_pkts, uint8_t *split_packet)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union i40e_rx_desc *rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -693,7 +693,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
index ba72df8e13..d19b9e4bf4 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
@@ -21,7 +21,7 @@ i40e_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+_i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned mask = rxq->nb_rx_desc - 1;
 	unsigned i;
@@ -68,7 +68,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 	 */
 	ad->rx_vec_allowed = true;
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads)) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index d16ceb6b5d..814aa666dc 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -17,18 +17,18 @@
 #include "i40e_rxtx_vec_common.h"
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
@@ -203,7 +203,7 @@ descs_to_fdir_16b(uint32x4_t fltstat, uint64x2_t descs[4], struct rte_mbuf **rx_
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	uint32x4_t vlan0, vlan1, rss, l3_l4e;
@@ -332,15 +332,15 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -374,7 +374,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -592,7 +592,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -738,15 +738,15 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 774519265b..74cd59e245 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -15,18 +15,18 @@
 #include <rte_vect.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -207,7 +207,7 @@ descs_to_fdir_16b(__m128i fltstat, __m128i descs[4], struct rte_mbuf **rx_pkt)
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 		  __m128i descs[4], struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -347,16 +347,16 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -382,7 +382,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,15 +755,15 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 05/13] net/ice: use the common Rx queue structure
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (2 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 04/13] net/i40e: use the " Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 06/13] net/iavf: " Anatoly Burakov
                     ` (7 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson

Make the ice driver use the new common Rx queue structure.

Because the ice driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_ICE_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue
structure will have both, while the actual descriptor format is picked by
ice at compile time using the above macro. Direct usage of Rx queue
structure is now meant to be replaced with a macro access that takes
descriptor size into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2:
    - Make xtr_field_offs of type ptrdiff_t instead of off_t to fix 32-bit compile
      issues

 drivers/net/intel/common/rx.h               |  23 +++
 drivers/net/intel/ice/ice_dcf.c             |   6 +-
 drivers/net/intel/ice/ice_dcf_ethdev.c      |  22 +--
 drivers/net/intel/ice/ice_ethdev.c          |   2 +-
 drivers/net/intel/ice/ice_ethdev.h          |   5 +-
 drivers/net/intel/ice/ice_rxtx.c            | 158 ++++++++++----------
 drivers/net/intel/ice/ice_rxtx.h            |  78 ++--------
 drivers/net/intel/ice/ice_rxtx_common_avx.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |  14 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |  16 +-
 drivers/net/intel/ice/ice_rxtx_vec_common.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  22 +--
 12 files changed, 164 insertions(+), 194 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index db49db57d0..9a691971bc 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -5,6 +5,7 @@
 #ifndef _COMMON_INTEL_RX_H_
 #define _COMMON_INTEL_RX_H_
 
+#include <stddef.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <rte_mbuf.h>
@@ -12,6 +13,7 @@
 
 #define CI_RX_BURST 32
 #define CI_RX_MAX_BURST 32
+#define CI_RX_MAX_NSEG 2
 
 struct ci_rx_queue;
 
@@ -23,6 +25,8 @@ struct ci_rx_entry_sc {
 	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
 };
 
+typedef void (*ci_rx_release_mbufs_t)(struct ci_rx_queue *rxq);
+
 /**
  * Structure associated with each RX queue.
  */
@@ -32,6 +36,8 @@ struct ci_rx_queue {
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
 		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
 		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
+		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
+		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -64,10 +70,16 @@ struct ci_rx_queue {
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	uint32_t rxdid; /**< RX descriptor format ID. */
+	uint32_t proto_xtr; /* protocol extraction type */
+	uint64_t xtr_ol_flag; /* flexible descriptor metadata extraction offload flag */
+	ptrdiff_t xtr_field_offs; /* Protocol extraction matedata offset*/
+	uint64_t hw_time_update; /**< Last time HW timestamp was updated */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
+		struct ice_vsi *ice_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -85,6 +97,17 @@ struct ci_rx_queue {
 			uint8_t hs_mode; /**< Header Split mode */
 			uint8_t dcb_tc; /**< Traffic class of rx queue */
 		};
+		struct { /* ice specific values */
+			ci_rx_release_mbufs_t rx_rel_mbufs; /**< release mbuf function */
+			/** holds buffer split information */
+			struct rte_eth_rxseg_split rxseg[CI_RX_MAX_NSEG];
+			struct ci_rx_entry *sw_split_buf; /**< Buffer split SW ring */
+			uint32_t rxseg_nb; /**< number of buffer split segments */
+			uint32_t time_high; /* high 32 bits of hardware timestamp register */
+			uint32_t hw_time_high; /* high 32 bits of timestamp */
+			uint32_t hw_time_low; /* low 32 bits of timestamp */
+			bool ts_enable; /* if rxq timestamp is enabled */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/ice/ice_dcf.c b/drivers/net/intel/ice/ice_dcf.c
index 65c18921f4..fddf5bbde5 100644
--- a/drivers/net/intel/ice/ice_dcf.c
+++ b/drivers/net/intel/ice/ice_dcf.c
@@ -1175,8 +1175,8 @@ ice_dcf_init_rss(struct ice_dcf_hw *hw)
 int
 ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)hw->eth_dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)hw->eth_dev->data->rx_queues;
 	struct ci_tx_queue **txq =
 		(struct ci_tx_queue **)hw->eth_dev->data->tx_queues;
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -1211,7 +1211,7 @@ ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 
 		vc_qp->rxq.max_pkt_size = rxq[i]->max_pkt_len;
 		vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
-		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_dma;
+		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 		vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
 
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index efff76afa8..53272ddd60 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -106,7 +106,7 @@ ice_dcf_xmit_pkts(__rte_unused void *tx_queue,
 }
 
 static int
-ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
+ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -145,8 +145,8 @@ ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
 static int
 ice_dcf_init_rx_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -282,7 +282,7 @@ ice_dcf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 }
 
 static int
-alloc_rxq_mbufs(struct ice_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
@@ -305,7 +305,7 @@ alloc_rxq_mbufs(struct ice_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = ICE_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
@@ -324,7 +324,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct iavf_hw *hw = &ad->real_hw.avf;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -358,7 +358,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 }
 
 static inline void
-reset_rx_queue(struct ice_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -369,7 +369,7 @@ reset_rx_queue(struct ice_rx_queue *rxq)
 	len = rxq->nb_rx_desc + ICE_RX_MAX_BURST;
 
 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)ICE_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
@@ -429,7 +429,7 @@ ice_dcf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -511,7 +511,7 @@ ice_dcf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 ice_dcf_start_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int nb_rxq = 0;
 	int nb_txq, i;
@@ -638,7 +638,7 @@ ice_dcf_stop_queues(struct rte_eth_dev *dev)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int ret, i;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.c b/drivers/net/intel/ice/ice_ethdev.c
index 2e163d706c..65cf586502 100644
--- a/drivers/net/intel/ice/ice_ethdev.c
+++ b/drivers/net/intel/ice/ice_ethdev.c
@@ -6690,7 +6690,7 @@ ice_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_adapter *ad =
 			ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t ts_high;
 	uint64_t ts_ns;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.h b/drivers/net/intel/ice/ice_ethdev.h
index afe8dae497..0ed223d83e 100644
--- a/drivers/net/intel/ice/ice_ethdev.h
+++ b/drivers/net/intel/ice/ice_ethdev.h
@@ -257,9 +257,6 @@ struct ice_vsi_list {
 	struct ice_vsi *vsi;
 };
 
-struct ice_rx_queue;
-struct ci_tx_queue;
-
 /**
  * Structure that defines a VSI, associated with a adapter.
  */
@@ -409,7 +406,7 @@ struct ice_fdir_counter_pool_container {
 struct ice_fdir_info {
 	struct ice_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	struct ci_tx_queue *txq;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt;                 /* memory for fdir program packet */
 	uint64_t dma_addr;             /* physic address of packet memory*/
 	const struct rte_memzone *mz;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 40ac01e782..4749ee729f 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -37,11 +37,11 @@ int
 ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = ICE_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.status_error0;
 
@@ -73,7 +73,7 @@ ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union ice_rx_flex_desc *rxdp)
 {
@@ -95,7 +95,7 @@ ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				   struct rte_mbuf *mb,
 				   volatile union ice_rx_flex_desc *rxdp)
 {
@@ -120,7 +120,7 @@ ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
 				      volatile union ice_rx_flex_desc *rxdp)
 {
@@ -164,7 +164,7 @@ ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
 				      volatile union ice_rx_flex_desc *rxdp)
 {
@@ -215,7 +215,7 @@ static const ice_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[] = {
 };
 
 void
-ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
+ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -243,17 +243,17 @@ ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
 }
 
 static int
-ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	struct ice_pf *pf = ICE_VSI_TO_PF(vsi);
-	struct rte_eth_dev_data *dev_data = rxq->vsi->adapter->pf.dev_data;
+	struct rte_eth_dev_data *dev_data = rxq->ice_vsi->adapter->pf.dev_data;
 	struct ice_rlan_ctx rx_ctx;
 	uint16_t buf_size;
 	uint32_t rxdid = ICE_RXDID_COMMS_OVS;
 	uint32_t regval;
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 	uint32_t frame_size = dev_data->mtu + ICE_ETH_OVERHEAD;
 	int err;
 
@@ -371,7 +371,7 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 		rx_ctx.dtype = 0; /* No Protocol Based Buffer Split mode */
 	}
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -452,15 +452,15 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 /* Allocate mbufs for all descriptors in rx queue */
 static int
-ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
+ice_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ice_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ice_rx_flex_desc *rxd;
-		rxd = &rxq->rx_ring[i];
+		rxd = ICE_RX_RING_PTR(rxq, i);
 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (unlikely(!mbuf)) {
@@ -514,7 +514,7 @@ ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
 
 /* Free all mbufs for descriptors in rx queue */
 static void
-_ice_rx_queue_release_mbufs(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -591,7 +591,7 @@ ice_switch_rx_queue(struct ice_hw *hw, uint16_t q_idx, bool on)
 }
 
 static inline int
-ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
+ice_check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -618,9 +618,9 @@ ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
 	return ret;
 }
 
-/* reset fields in ice_rx_queue back to default */
+/* reset fields in ci_rx_queue back to default */
 static void
-ice_reset_rx_queue(struct ice_rx_queue *rxq)
+ice_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 	uint16_t len;
@@ -633,7 +633,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 	len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST);
 
 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)ICE_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < ICE_RX_MAX_BURST; ++i)
@@ -655,7 +655,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -715,7 +715,7 @@ ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 ice_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -834,9 +834,9 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 }
 
 static int
-ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_fdir_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	uint32_t rxdid = ICE_RXDID_LEGACY_1;
 	struct ice_rlan_ctx rx_ctx;
@@ -848,7 +848,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -909,7 +909,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_fdir_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1099,7 +1099,7 @@ ice_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 int
 ice_fdir_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1170,7 +1170,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct ice_vsi *vsi = pf->main_vsi;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size, tlen;
 	uint16_t len;
@@ -1206,7 +1206,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket(NULL,
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 
@@ -1240,7 +1240,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 		rxq->crc_len = 0;
 
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->ice_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->proto_xtr = pf->proto_xtr != NULL ?
 			 pf->proto_xtr[queue_idx] : PROTO_XTR_NONE;
@@ -1274,8 +1274,8 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	/* Zero all the descriptors in the ring. */
 	memset(rz->addr, 0, ring_size);
 
-	rxq->rx_ring_dma = rz->iova;
-	rxq->rx_ring = rz->addr;
+	rxq->rx_ring_phys_addr = rz->iova;
+	ICE_RX_RING(rxq) = rz->addr;
 
 	/* always reserve more for bulk alloc */
 	len = (uint16_t)(nb_desc + ICE_RX_MAX_BURST);
@@ -1287,7 +1287,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the software ring. */
 	rxq->sw_ring = rte_zmalloc_socket(NULL,
-					  sizeof(struct ice_rx_entry) * tlen,
+					  sizeof(struct ci_rx_entry) * tlen,
 					  RTE_CACHE_LINE_SIZE,
 					  socket_id);
 	if (!rxq->sw_ring) {
@@ -1324,7 +1324,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 void
 ice_rx_queue_release(void *rxq)
 {
-	struct ice_rx_queue *q = (struct ice_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -1548,7 +1548,7 @@ void
 ice_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		 struct rte_eth_rxq_info *qinfo)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -1586,11 +1586,11 @@ ice_rx_queue_count(void *rx_queue)
 {
 #define ICE_RXQ_SCAN_INTERVAL 4
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	while ((desc < rxq->nb_rx_desc) &&
 	       rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	       (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)) {
@@ -1602,8 +1602,8 @@ ice_rx_queue_count(void *rx_queue)
 		desc += ICE_RXQ_SCAN_INTERVAL;
 		rxdp += ICE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-				 desc - rxq->nb_rx_desc]);
+			rxdp = ICE_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -1695,25 +1695,25 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
 #define ICE_PTP_TS_VALID 0x1
 
 static inline int
-ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
+ice_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len, hdr_len;
 	int32_t s[ICE_LOOK_AHEAD], nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags = 0;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 #endif
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -1843,7 +1843,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 }
 
 static inline uint16_t
-ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
+ice_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -1862,10 +1862,10 @@ ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
 }
 
 static inline int
-ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
+ice_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -1894,7 +1894,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 		}
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = ICE_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -1933,7 +1933,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = (struct ice_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -1951,7 +1951,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (ice_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 			PMD_RX_LOG(DEBUG, "Rx mbuf alloc failed for "
 				   "port_id=%u, queue_id=%u",
@@ -2006,12 +2006,12 @@ ice_recv_scattered_pkts(void *rx_queue,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_flex_desc *rx_ring = ICE_RX_RING(rxq);
 	volatile union ice_rx_flex_desc *rxdp;
 	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
@@ -2023,13 +2023,13 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2050,7 +2050,7 @@ ice_recv_scattered_pkts(void *rx_queue,
 		/* allocate mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
@@ -2319,7 +2319,7 @@ int
 ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint32_t desc;
 
 	if (unlikely(offset >= rxq->nb_rx_desc))
@@ -2332,7 +2332,7 @@ ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = ICE_RX_RING_PTR(rxq, desc);
 	if (rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	    (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S))
 		return RTE_ETH_RX_DESC_DONE;
@@ -2459,7 +2459,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
 int
 ice_fdir_setup_rx_resources(struct ice_pf *pf)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -2473,7 +2473,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("ice fdir rx queue",
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 SOCKET_ID_ANY);
 	if (!rxq) {
@@ -2499,12 +2499,12 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 	rxq->nb_rx_desc = ICE_FDIR_NUM_RX_DESC;
 	rxq->queue_id = ICE_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->ice_vsi = pf->fdir.fdir_vsi;
 
-	rxq->rx_ring_dma = rz->iova;
+	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, ICE_FDIR_NUM_RX_DESC *
 	       sizeof(union ice_32byte_rx_desc));
-	rxq->rx_ring = (union ice_rx_flex_desc *)rz->addr;
+	ICE_RX_RING(rxq) = (union ice_rx_flex_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -2523,12 +2523,12 @@ ice_recv_pkts(void *rx_queue,
 	      struct rte_mbuf **rx_pkts,
 	      uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_flex_desc *rx_ring = ICE_RX_RING(rxq);
 	volatile union ice_rx_flex_desc *rxdp;
 	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
 	struct rte_mbuf *nmb_pay; /* new allocated payload mbuf */
 	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
@@ -2540,13 +2540,13 @@ ice_recv_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2567,7 +2567,7 @@ ice_recv_pkts(void *rx_queue,
 		/* allocate header mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 
@@ -2594,7 +2594,7 @@ ice_recv_pkts(void *rx_queue,
 			/* allocate payload mbuf */
 			nmb_pay = rte_mbuf_raw_alloc(rxq->rxseg[1].mp);
 			if (unlikely(!nmb_pay)) {
-				rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+				rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 				rxe->mbuf = NULL;
 				nb_hold--;
 				if (unlikely(rx_id == 0))
@@ -3472,7 +3472,7 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int i;
 	int rx_check_ret = -1;
 
@@ -4634,7 +4634,7 @@ ice_set_default_ptype_table(struct rte_eth_dev *dev)
  * tx queue
  */
 static inline int
-ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
+ice_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union ice_32byte_rx_desc *rxdp;
 	uint64_t qword1;
@@ -4644,7 +4644,7 @@ ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
 	int ret = -EAGAIN;
 
 	rxdp = (volatile union ice_32byte_rx_desc *)
-		(&rxq->rx_ring[rxq->rx_tail]);
+			ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & ICE_RXD_QW1_STATUS_M)
 			>> ICE_RXD_QW1_STATUS_S;
@@ -4689,7 +4689,7 @@ int
 ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct ice_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	volatile struct ice_fltr_desc *fdirdp;
 	volatile struct ice_tx_desc *txdp;
 	uint32_t td_cmd;
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 276d40b57f..1a39770d7d 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _ICE_RXTX_H_
 #define _ICE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 #include "ice_ethdev.h"
 
@@ -14,21 +15,28 @@
 #define ICE_DMA_MEM_ALIGN    4096
 #define ICE_RING_BASE_ALIGN  128
 
-#define ICE_RX_MAX_BURST 32
+#define ICE_RX_MAX_BURST CI_RX_MAX_BURST
 #define ICE_TX_MAX_BURST 32
 
 /* Maximal number of segments to split. */
-#define ICE_RX_MAX_NSEG 2
+#define ICE_RX_MAX_NSEG CI_RX_MAX_NSEG
 
 #define ICE_CHK_Q_ENA_COUNT        100
 #define ICE_CHK_Q_ENA_INTERVAL_US  100
 
 #ifdef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 #define ice_rx_flex_desc ice_16b_rx_flex_desc
+#define ICE_RX_RING(rxq) \
+	((rxq)->ice_rx_16b_ring)
 #else
 #define ice_rx_flex_desc ice_32b_rx_flex_desc
+#define ICE_RX_RING(rxq) \
+	((rxq)->ice_rx_32b_ring)
 #endif
 
+#define ICE_RX_RING_PTR(rxq, entry) \
+	(ICE_RX_RING(rxq) + (entry))
+
 #define ICE_SUPPORT_CHAIN_NUM 5
 
 #define ICE_TD_CMD                      ICE_TX_DESC_CMD_EOP
@@ -78,74 +86,16 @@ extern int ice_timestamp_dynfield_offset;
 
 #define ICE_TX_MTU_SEG_MAX	8
 
-typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
-typedef void (*ice_rxd_to_pkt_fields_t)(struct ice_rx_queue *rxq,
+typedef void (*ice_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 					struct rte_mbuf *mb,
 					volatile union ice_rx_flex_desc *rxdp);
 
-struct ice_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
 enum ice_rx_dtype {
 	ICE_RX_DTYPE_NO_SPLIT       = 0,
 	ICE_RX_DTYPE_HEADER_SPLIT   = 1,
 	ICE_RX_DTYPE_SPLIT_ALWAYS   = 2,
 };
 
-struct ice_rx_queue {
-	struct rte_mempool *mp; /* mbuf pool to populate RX ring */
-	volatile union ice_rx_flex_desc *rx_ring;/* RX ring virtual address */
-	rte_iova_t rx_ring_dma; /* RX ring DMA address */
-	struct ice_rx_entry *sw_ring; /* address of RX soft ring */
-	uint16_t nb_rx_desc; /* number of RX descriptors */
-	uint16_t rx_free_thresh; /* max free RX desc to hold */
-	uint16_t rx_tail; /* current value of tail */
-	uint16_t nb_rx_hold; /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-	struct rte_mbuf *rx_stage[ICE_RX_MAX_BURST * 2];
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /* device port ID */
-	uint8_t crc_len; /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /* RX queue index */
-	uint16_t reg_idx; /* RX queue register index */
-	uint8_t drop_en; /* if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /* register address of tail */
-	struct ice_vsi *vsi; /* the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	bool q_set; /* indicate if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	uint8_t proto_xtr; /* Protocol extraction from flexible descriptor */
-	int xtr_field_offs; /*Protocol extraction matedata offset*/
-	uint64_t xtr_ol_flag; /* Protocol extraction offload flag */
-	uint32_t rxdid; /* Receive Flex Descriptor profile ID */
-	ice_rx_release_mbufs_t rx_rel_mbufs;
-	uint64_t offloads;
-	uint32_t time_high;
-	uint32_t hw_register_set;
-	const struct rte_memzone *mz;
-	uint32_t hw_time_high; /* high 32 bits of timestamp */
-	uint32_t hw_time_low; /* low 32 bits of timestamp */
-	uint64_t hw_time_update; /* SW time of HW record updating */
-	struct ice_rx_entry *sw_split_buf;
-	/* address of temp buffer for RX split mbufs */
-	struct rte_eth_rxseg_split rxseg[ICE_RX_MAX_NSEG];
-	uint32_t rxseg_nb;
-	bool ts_enable; /* if rxq timestamp is enabled */
-};
-
 /* Offload features */
 union ice_tx_offload {
 	uint64_t data;
@@ -249,12 +199,12 @@ int ice_tx_descriptor_status(void *tx_queue, uint16_t offset);
 void ice_set_default_ptype_table(struct rte_eth_dev *dev);
 const uint32_t *ice_dev_supported_ptypes_get(struct rte_eth_dev *dev,
 					     size_t *no_of_elements);
-void ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq,
+void ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq,
 					  uint32_t rxdid);
 
 int ice_rx_vec_dev_check(struct rte_eth_dev *dev);
 int ice_tx_vec_dev_check(struct rte_eth_dev *dev);
-int ice_rxq_vec_setup(struct ice_rx_queue *rxq);
+int ice_rxq_vec_setup(struct ci_rx_queue *rxq);
 int ice_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
@@ -299,7 +249,7 @@ int ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 #define FDIR_PARSING_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->pf.dev_data->nb_rx_queues; i++) { \
-		struct ice_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
index c62e60c70e..7209c902db 100644
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ b/drivers/net/intel/ice/ice_rxtx_common_avx.h
@@ -9,14 +9,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 0c54b325c6..f4555369a2 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -8,7 +8,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, false);
 }
@@ -33,17 +33,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet,
 			    bool offload)
 {
 #define ICE_DESCS_PER_LOOP_AVX 8
 
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ice_rx_flex_desc *rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -445,7 +445,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -694,7 +694,7 @@ static __rte_always_inline uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts, bool offload)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index bd49be07c9..6eea74d703 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -10,7 +10,7 @@
 #define ICE_DESCS_PER_LOOP_AVX 8
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, true);
 }
@@ -35,17 +35,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
+_ice_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			      struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts,
 			      uint8_t *split_packet,
 			      bool do_offload)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ice_rx_flex_desc *rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -467,7 +467,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -723,7 +723,7 @@ static uint16_t
 ice_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -765,7 +765,7 @@ ice_recv_scattered_burst_vec_avx512_offload(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_common.h b/drivers/net/intel/ice/ice_rxtx_vec_common.h
index 7933c26366..9430a99ba5 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/intel/ice/ice_rxtx_vec_common.h
@@ -17,7 +17,7 @@ ice_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -79,7 +79,7 @@ _ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
 #define ICE_VECTOR_OFFLOAD_PATH	1
 
 static inline int
-ice_rx_vec_queue_default(struct ice_rx_queue *rxq)
+ice_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -119,7 +119,7 @@ static inline int
 ice_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret = 0;
 	int result = 0;
 
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 97f05ba45e..dc9d37226a 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -26,18 +26,18 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 }
 
 static inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 					  RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -105,7 +105,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 }
 
 static inline void
-ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
+ice_rx_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 			 struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -301,15 +301,15 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	__m128i crc_adjust = _mm_set_epi16
 				(0, 0, 0,       /* ignore non-length fields */
 				 -rxq->crc_len, /* sub crc on data_len */
@@ -361,7 +361,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -482,7 +482,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		 * needs to load 2nd 16B of each desc for RSS hash parsing,
 		 * will cause performance drop to get into this context.
 		 */
-		if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+		if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 			/* load bottom half of every 32B desc */
 			const __m128i raw_desc_bh3 =
@@ -608,7 +608,7 @@ static uint16_t
 ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -779,7 +779,7 @@ ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 int __rte_cold
-ice_rxq_vec_setup(struct ice_rx_queue *rxq)
+ice_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 06/13] net/iavf: use the common Rx queue structure
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (3 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 05/13] net/ice: " Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
                     ` (6 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin, Ian Stokes

Make the iavf driver use the new common Rx queue structure.

Because the iavf driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_IAVF_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue structure
will have both, while the actual descriptor format is picked by iavf at
compile time using the above macro. Direct usage of Rx queue structure is
now meant to be replaced with a macro access that takes descriptor size
into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  12 ++
 drivers/net/intel/iavf/iavf.h                 |   4 +-
 drivers/net/intel/iavf/iavf_ethdev.c          |  12 +-
 drivers/net/intel/iavf/iavf_rxtx.c            | 192 +++++++++---------
 drivers/net/intel/iavf/iavf_rxtx.h            |  76 ++-----
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  24 +--
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  22 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  27 ++-
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  12 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  46 ++---
 drivers/net/intel/iavf/iavf_vchnl.c           |   6 +-
 11 files changed, 198 insertions(+), 235 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 9a691971bc..2d9328ae89 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -38,6 +38,8 @@ struct ci_rx_queue {
 		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
 		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
 		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
+		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
+		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -80,6 +82,7 @@ struct ci_rx_queue {
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
 		struct ice_vsi *ice_vsi;
+		struct iavf_vsi *iavf_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -108,6 +111,15 @@ struct ci_rx_queue {
 			uint32_t hw_time_low; /* low 32 bits of timestamp */
 			bool ts_enable; /* if rxq timestamp is enabled */
 		};
+		struct { /* iavf specific values */
+			const struct iavf_rxq_ops *ops; /**< queue ops */
+			struct iavf_rx_queue_stats *stats; /**< per-queue stats */
+			uint64_t phc_time; /**< HW timestamp */
+			uint8_t rel_mbufs_type; /**< type of release mbuf function */
+			uint8_t rx_flags; /**< Rx VLAN tag location flags */
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
+		};
 	};
 };
 
diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 97e6b243fb..f81c939c96 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -97,7 +97,7 @@
 #define IAVF_L2TPV2_FLAGS_LEN	0x4000
 
 struct iavf_adapter;
-struct iavf_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 
@@ -555,7 +555,7 @@ int iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
 		uint8_t *resp_msg, size_t resp_msg_len);
 extern const struct rte_tm_ops iavf_tm_ops;
 int iavf_get_ptp_cap(struct iavf_adapter *adapter);
-int iavf_get_phc_time(struct iavf_rx_queue *rxq);
+int iavf_get_phc_time(struct ci_rx_queue *rxq);
 int iavf_flow_sub(struct iavf_adapter *adapter,
 		  struct iavf_fsub_conf *filter);
 int iavf_flow_unsub(struct iavf_adapter *adapter,
diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index 5babd587b3..4e843a3532 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -728,7 +728,7 @@ iavf_dev_configure(struct rte_eth_dev *dev)
 }
 
 static int
-iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
+iavf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -779,8 +779,8 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
 static int
 iavf_init_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue **rxq =
-		(struct iavf_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret = IAVF_SUCCESS;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -955,7 +955,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 static int
 iavf_start_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 	uint16_t nb_txq, nb_rxq;
@@ -1867,9 +1867,9 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 {
 	uint16_t idx;
 	for (idx = 0; idx < ethdev->data->nb_rx_queues; idx++) {
-		struct iavf_rx_queue *rxq;
+		struct ci_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
-		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
+		rxq = (struct ci_rx_queue *)ethdev->data->rx_queues[idx];
 		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index d23d2df807..a9ce4b55d9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -128,12 +128,12 @@ iavf_monitor_callback(const uint64_t value,
 int
 iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile union iavf_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = IAVF_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.qword1.status_error_len;
 
@@ -222,7 +222,7 @@ check_tx_vec_allow(struct ci_tx_queue *txq)
 }
 
 static inline bool
-check_rx_bulk_allow(struct iavf_rx_queue *rxq)
+check_rx_bulk_allow(struct ci_rx_queue *rxq)
 {
 	int ret = true;
 
@@ -243,7 +243,7 @@ check_rx_bulk_allow(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-reset_rx_queue(struct iavf_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -254,12 +254,12 @@ reset_rx_queue(struct iavf_rx_queue *rxq)
 	len = rxq->nb_rx_desc + IAVF_RX_MAX_BURST;
 
 	for (i = 0; i < len * sizeof(union iavf_rx_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)IAVF_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
 	for (i = 0; i < IAVF_RX_MAX_BURST; i++)
-		rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf;
+		rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
 
 	/* for rx bulk */
 	rxq->rx_nb_avail = 0;
@@ -315,7 +315,7 @@ reset_tx_queue(struct ci_tx_queue *txq)
 }
 
 static int
-alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	volatile union iavf_rx_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
@@ -326,8 +326,8 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!mbuf)) {
 			for (j = 0; j < i; j++) {
-				rte_pktmbuf_free_seg(rxq->sw_ring[j]);
-				rxq->sw_ring[j] = NULL;
+				rte_pktmbuf_free_seg(rxq->sw_ring[j].mbuf);
+				rxq->sw_ring[j].mbuf = NULL;
 			}
 			PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
 			return -ENOMEM;
@@ -342,7 +342,7 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = IAVF_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
@@ -350,14 +350,14 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		rxd->read.rsvd2 = 0;
 #endif
 
-		rxq->sw_ring[i] = mbuf;
+		rxq->sw_ring[i].mbuf = mbuf;
 	}
 
 	return 0;
 }
 
 static inline void
-release_rxq_mbufs(struct iavf_rx_queue *rxq)
+release_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -365,9 +365,9 @@ release_rxq_mbufs(struct iavf_rx_queue *rxq)
 		return;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
-		if (rxq->sw_ring[i]) {
-			rte_pktmbuf_free_seg(rxq->sw_ring[i]);
-			rxq->sw_ring[i] = NULL;
+		if (rxq->sw_ring[i].mbuf) {
+			rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+			rxq->sw_ring[i].mbuf = NULL;
 		}
 	}
 
@@ -395,7 +395,7 @@ struct iavf_rxq_ops iavf_rxq_release_mbufs_ops[] = {
 };
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				    struct rte_mbuf *mb,
 				    volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -420,7 +420,7 @@ iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -462,7 +462,7 @@ iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -517,7 +517,7 @@ iavf_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[IAVF_RXDID_LAST + 1] = {
 };
 
 static void
-iavf_select_rxd_to_pkt_fields_handler(struct iavf_rx_queue *rxq, uint32_t rxdid)
+iavf_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -572,7 +572,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	struct iavf_info *vf =
 		IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_vsi *vsi = &vf->vsi;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *mz;
 	uint32_t ring_size;
 	uint8_t proto_xtr;
@@ -610,7 +610,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("iavf rxq",
-				 sizeof(struct iavf_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -668,7 +668,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	rxq->port_id = dev->data->port_id;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->rx_hdr_len = 0;
-	rxq->vsi = vsi;
+	rxq->iavf_vsi = vsi;
 	rxq->offloads = offloads;
 
 	if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
@@ -713,7 +713,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	/* Zero all the descriptors in the ring. */
 	memset(mz->addr, 0, ring_size);
 	rxq->rx_ring_phys_addr = mz->iova;
-	rxq->rx_ring = (union iavf_rx_desc *)mz->addr;
+	IAVF_RX_RING(rxq) = (union iavf_rx_desc *)mz->addr;
 
 	rxq->mz = mz;
 	reset_rx_queue(rxq);
@@ -905,7 +905,7 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	PMD_DRV_FUNC_TRACE();
@@ -997,7 +997,7 @@ iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	PMD_DRV_FUNC_TRACE();
@@ -1060,7 +1060,7 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 void
 iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-	struct iavf_rx_queue *q = dev->data->rx_queues[qid];
+	struct ci_rx_queue *q = dev->data->rx_queues[qid];
 
 	if (!q)
 		return;
@@ -1089,7 +1089,7 @@ iavf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 static void
 iavf_reset_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 
@@ -1375,7 +1375,7 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
  * from the hardware point of view.
  */
 static inline void
-iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+iavf_update_rx_tail(struct ci_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
 {
 	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
 
@@ -1397,9 +1397,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	volatile union iavf_rx_desc *rx_ring;
 	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1416,8 +1416,8 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = IAVF_RX_RING(rxq);
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1442,13 +1442,13 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1456,9 +1456,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1506,9 +1506,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 {
 	volatile union iavf_rx_desc *rx_ring;
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1525,8 +1525,8 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = IAVF_RX_RING(rxq);
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1559,13 +1559,13 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1573,9 +1573,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1629,9 +1629,9 @@ uint16_t
 iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1643,9 +1643,9 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t pkt_flags;
 	uint64_t ts_ns;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_desc *rx_ring = IAVF_RX_RING(rxq);
 	volatile union iavf_rx_flex_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1678,13 +1678,13 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1692,10 +1692,10 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1806,9 +1806,9 @@ uint16_t
 iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1820,9 +1820,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_desc *rx_ring = IAVF_RX_RING(rxq);
 	volatile union iavf_rx_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1847,13 +1847,13 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1861,10 +1861,10 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1963,12 +1963,12 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 #define IAVF_LOOK_AHEAD 8
 static inline int
-iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
+iavf_rx_scan_hw_ring_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts)
 {
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len;
@@ -1976,10 +1976,10 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 	uint64_t ts_ns;
 
-	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxdp = (volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2038,7 +2038,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 					  rxq->rx_tail +
 					  i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
 				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
 			mb->data_len = pkt_len;
@@ -2072,11 +2072,11 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else {
 				/* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2090,16 +2090,16 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline int
-iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+iavf_rx_scan_hw_ring(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -2108,9 +2108,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
@@ -2164,7 +2164,7 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
 					 rxq->rx_tail + i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			qword1 = rte_le_to_cpu_64
 					(rxdp[j].wb.qword1.status_error_len);
 			pkt_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
@@ -2190,10 +2190,10 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else { /* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2207,13 +2207,13 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline uint16_t
-iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
+iavf_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -2232,10 +2232,10 @@ iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
 }
 
 static inline int
-iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
+iavf_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -2252,13 +2252,13 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = IAVF_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
-			rte_prefetch0(rxep[i + 1]);
+			rte_prefetch0(rxep[i + 1].mbuf);
 
-		mb = rxep[i];
+		mb = rxep[i].mbuf;
 		rte_mbuf_refcnt_set(mb, 1);
 		mb->next = NULL;
 		mb->data_off = RTE_PKTMBUF_HEADROOM;
@@ -2284,7 +2284,7 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = (struct iavf_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -2312,11 +2312,11 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 			rxq->rx_tail = (uint16_t)(rxq->rx_tail - (nb_rx + nb_staged));
 			for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++) {
-				rxq->sw_ring[j] = rx_pkts[i];
+				rxq->sw_ring[j].mbuf = rx_pkts[i];
 				rx_pkts[i] = NULL;
 			}
 			for (i = 0, j = rxq->rx_tail + nb_rx; i < nb_staged; i++, j++) {
-				rxq->sw_ring[j] = rxq->rx_stage[i];
+				rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
 				rx_pkts[i] = NULL;
 			}
 
@@ -3843,13 +3843,13 @@ static uint16_t
 iavf_recv_pkts_no_poll(void *rx_queue, struct rte_mbuf **rx_pkts,
 				uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	enum iavf_rx_burst_type rx_burst_type;
 
-	if (!rxq->vsi || rxq->vsi->adapter->no_poll)
+	if (!rxq->iavf_vsi || rxq->iavf_vsi->adapter->no_poll)
 		return 0;
 
-	rx_burst_type = rxq->vsi->adapter->rx_burst_type;
+	rx_burst_type = rxq->iavf_vsi->adapter->rx_burst_type;
 
 	return iavf_rx_pkt_burst_ops[rx_burst_type].pkt_burst(rx_queue,
 								rx_pkts, nb_pkts);
@@ -3965,7 +3965,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	enum iavf_rx_burst_type rx_burst_type;
 	int no_poll_on_link_down = adapter->devargs.no_poll_on_link_down;
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool use_flex = true;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -4379,7 +4379,7 @@ void
 iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		     struct rte_eth_rxq_info *qinfo)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -4414,11 +4414,11 @@ iavf_dev_rxq_count(void *rx_queue)
 {
 #define IAVF_RXQ_SCAN_INTERVAL 4
 	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
@@ -4431,8 +4431,8 @@ iavf_dev_rxq_count(void *rx_queue)
 		desc += IAVF_RXQ_SCAN_INTERVAL;
 		rxdp += IAVF_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-					desc - rxq->nb_rx_desc]);
+			rxdp = IAVF_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -4441,7 +4441,7 @@ iavf_dev_rxq_count(void *rx_queue)
 int
 iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -4456,7 +4456,7 @@ iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
+	status = &IAVF_RX_RING_PTR(rxq, desc)->wb.qword1.status_error_len;
 	mask = rte_le_to_cpu_64((1ULL << IAVF_RX_DESC_STATUS_DD_SHIFT)
 		<< IAVF_RXD_QW1_STATUS_SHIFT);
 	if (*status & mask)
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 62b5a67c84..c43ddc3c2f 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -17,7 +17,7 @@
 #define IAVF_RING_BASE_ALIGN      128
 
 /* used for Rx Bulk Allocate */
-#define IAVF_RX_MAX_BURST         32
+#define IAVF_RX_MAX_BURST         CI_RX_MAX_BURST
 
 /* Max data buffer size must be 16K - 128 bytes */
 #define IAVF_RX_MAX_DATA_BUF_SIZE (16 * 1024 - 128)
@@ -198,17 +198,24 @@ union iavf_32b_rx_flex_desc {
 #ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 #define iavf_rx_desc iavf_16byte_rx_desc
 #define iavf_rx_flex_desc iavf_16b_rx_flex_desc
+#define IAVF_RX_RING(rxq) \
+	((rxq)->iavf_rx_16b_ring)
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
 #define iavf_rx_flex_desc iavf_32b_rx_flex_desc
+#define IAVF_RX_RING(rxq) \
+	((rxq)->iavf_rx_32b_ring)
 #endif
 
-typedef void (*iavf_rxd_to_pkt_fields_t)(struct iavf_rx_queue *rxq,
+#define IAVF_RX_RING_PTR(rxq, entry) \
+	(IAVF_RX_RING(rxq) + (entry))
+
+typedef void (*iavf_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 				struct rte_mbuf *mb,
 				volatile union iavf_rx_flex_desc *rxdp);
 
 struct iavf_rxq_ops {
-	void (*release_mbufs)(struct iavf_rx_queue *rxq);
+	void (*release_mbufs)(struct ci_rx_queue *rxq);
 };
 
 struct iavf_txq_ops {
@@ -221,59 +228,6 @@ struct iavf_rx_queue_stats {
 	struct iavf_ipsec_crypto_stats ipsec_crypto;
 };
 
-/* Structure associated with each Rx queue. */
-struct iavf_rx_queue {
-	struct rte_mempool *mp;       /* mbuf pool to populate Rx ring */
-	const struct rte_memzone *mz; /* memzone for Rx ring */
-	volatile union iavf_rx_desc *rx_ring; /* Rx ring virtual address */
-	uint64_t rx_ring_phys_addr;   /* Rx ring DMA address */
-	struct rte_mbuf **sw_ring;     /* address of SW ring */
-	uint16_t nb_rx_desc;          /* ring length */
-	uint16_t rx_tail;             /* current value of tail */
-	volatile uint8_t *qrx_tail;   /* register address of tail */
-	uint16_t rx_free_thresh;      /* max free RX desc to hold */
-	uint16_t nb_rx_hold;          /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
-	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
-	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
-	uint8_t rxdid;
-	uint8_t rel_mbufs_type;
-
-	/* used for VPMD */
-	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
-	uint16_t rxrearm_start;    /* the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /* value to init mbufs */
-
-	/* for rx bulk */
-	uint16_t rx_nb_avail;      /* number of staged packets ready */
-	uint16_t rx_next_avail;    /* index of next staged packets */
-	uint16_t rx_free_trigger;  /* triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[IAVF_RX_MAX_BURST * 2]; /* store mbuf */
-
-	uint16_t port_id;        /* device port ID */
-	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id;      /* Rx queue index */
-	uint16_t rx_buf_len;    /* The packet buffer size */
-	uint16_t rx_hdr_len;    /* The header buffer size */
-	uint16_t max_pkt_len;   /* Maximum packet length */
-	struct iavf_vsi *vsi; /**< the VSI this queue belongs to */
-
-	bool q_set;             /* if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	const struct iavf_rxq_ops *ops;
-	uint8_t rx_flags;
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
-	uint8_t proto_xtr; /* protocol extraction type */
-	uint64_t xtr_ol_flag;
-		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats *stats;
-	uint64_t offloads;
-	uint64_t phc_time;
-	uint64_t hw_time_update;
-};
-
 /* Offload features */
 union iavf_tx_offload {
 	uint64_t data;
@@ -691,7 +645,7 @@ uint16_t iavf_xmit_pkts_vec_avx2_offload(void *tx_queue, struct rte_mbuf **tx_pk
 int iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 int iavf_rx_vec_dev_check(struct rte_eth_dev *dev);
 int iavf_tx_vec_dev_check(struct rte_eth_dev *dev);
-int iavf_rxq_vec_setup(struct iavf_rx_queue *rxq);
+int iavf_rxq_vec_setup(struct ci_rx_queue *rxq);
 int iavf_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t iavf_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts);
@@ -731,12 +685,12 @@ uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
 
 void iavf_set_default_ptype_table(struct rte_eth_dev *dev);
 void iavf_tx_queue_release_mbufs_avx512(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq);
 void iavf_tx_queue_release_mbufs_sse(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq);
 
 static inline
-void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq,
+void iavf_dump_rx_descriptor(struct ci_rx_queue *rxq,
 			    const volatile void *desc,
 			    uint16_t rx_id)
 {
@@ -794,7 +748,7 @@ void iavf_dump_tx_descriptor(const struct ci_tx_queue *txq,
 #define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->dev_data->nb_rx_queues; i++) { \
-		struct iavf_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index 88e35dc3e9..f51fa4acf9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -7,7 +7,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, false);
 }
@@ -15,21 +15,19 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 #define PKTLEN_SHIFT     10
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq,
 			     struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts, uint8_t *split_packet,
 			     bool offload)
 {
 #define IAVF_DESCS_PER_LOOP_AVX 8
 
-	/* const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; */
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	/* struct iavf_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail]; */
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_desc *rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -487,14 +485,14 @@ flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct ci_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts, uint8_t *split_packet,
 				      bool offload)
 {
 #define IAVF_DESCS_PER_LOOP_AVX 8
 
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
@@ -503,9 +501,9 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+		(volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -1476,7 +1474,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1565,7 +1563,7 @@ iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index f2af028bef..80495f33cd 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -28,26 +28,26 @@
 #define IAVF_RX_TS_OFFLOAD
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			       struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts, uint8_t *split_packet,
 			       bool offload)
 {
 #ifdef IAVF_RX_PTYPE_OFFLOAD
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 #endif
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_desc *rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -578,13 +578,13 @@ flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct ci_rx_queue *rxq,
 					struct rte_mbuf **rx_pkts,
 					uint16_t nb_pkts,
 					uint8_t *split_packet,
 					bool offload)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -594,9 +594,9 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+		(volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -1653,7 +1653,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1729,7 +1729,7 @@ iavf_recv_scattered_burst_vec_avx512_flex_rxd(void *rx_queue,
 					      uint16_t nb_pkts,
 					      bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index 38e9a206d9..f0a7d19b6a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -8,7 +8,6 @@
 #include <ethdev_driver.h>
 #include <rte_malloc.h>
 
-#include "../common/rx.h"
 #include "iavf.h"
 #include "iavf_rxtx.h"
 
@@ -21,7 +20,7 @@ iavf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
+_iavf_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -32,15 +31,15 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 	/* free all mbufs that are valid in the ring */
 	if (rxq->rxrearm_nb == 0) {
 		for (i = 0; i < rxq->nb_rx_desc; i++) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	} else {
 		for (i = rxq->rx_tail;
 		     i != rxq->rxrearm_start;
 		     i = (i + 1) & mask) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	}
 
@@ -51,7 +50,7 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 }
 
 static inline int
-iavf_rx_vec_queue_default(struct iavf_rx_queue *rxq)
+iavf_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -117,7 +116,7 @@ static inline int
 iavf_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret;
 	int result = 0;
 
@@ -240,14 +239,14 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 
 #ifdef RTE_ARCH_X86
 static __rte_always_inline void
-iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
+iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -259,7 +258,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -278,8 +277,8 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index a583340f15..e1c8f3c7f9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -15,7 +15,7 @@
 #include "iavf_rxtx_vec_common.h"
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
@@ -75,7 +75,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	RTE_SET_USED(rxdp);
@@ -193,7 +193,7 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
@@ -203,7 +203,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
 	struct rte_mbuf **sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -394,13 +394,13 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_NEON_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 2e41079e88..f18dfd636c 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -13,19 +13,19 @@
 #include <rte_vect.h>
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
@@ -33,7 +33,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -47,8 +47,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
@@ -88,7 +88,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		  struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -206,11 +206,11 @@ flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
 		       struct rte_mbuf **rx_pkts)
 #else
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
 #endif
 {
@@ -466,16 +466,16 @@ flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -500,7 +500,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -571,7 +571,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -714,16 +714,16 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+_recv_raw_pkts_vec_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -779,7 +779,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = (volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+	rxdp = (volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -857,7 +857,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -1207,7 +1207,7 @@ static uint16_t
 iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1276,7 +1276,7 @@ iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
 				       struct rte_mbuf **rx_pkts,
 				       uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1449,7 +1449,7 @@ iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
@@ -1462,7 +1462,7 @@ iavf_txq_vec_setup(struct ci_tx_queue *txq)
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_SSE_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_vchnl.c b/drivers/net/intel/iavf/iavf_vchnl.c
index 6feca8435e..9f8bb07726 100644
--- a/drivers/net/intel/iavf/iavf_vchnl.c
+++ b/drivers/net/intel/iavf/iavf_vchnl.c
@@ -1218,7 +1218,7 @@ int
 iavf_configure_queues(struct iavf_adapter *adapter,
 		uint16_t num_queue_pairs, uint16_t index)
 {
-	struct iavf_rx_queue **rxq = (struct iavf_rx_queue **)adapter->dev_data->rx_queues;
+	struct ci_rx_queue **rxq = (struct ci_rx_queue **)adapter->dev_data->rx_queues;
 	struct ci_tx_queue **txq = (struct ci_tx_queue **)adapter->dev_data->tx_queues;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -2258,9 +2258,9 @@ iavf_get_ptp_cap(struct iavf_adapter *adapter)
 }
 
 int
-iavf_get_phc_time(struct iavf_rx_queue *rxq)
+iavf_get_phc_time(struct ci_rx_queue *rxq)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_phc_time phc_time;
 	struct iavf_cmd_info args;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 07/13] net/intel: generalize vectorized Rx rearm
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (4 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 06/13] net/iavf: " Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
                     ` (5 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson

There is certain amount of duplication between various drivers when it
comes to Rx ring rearm. This patch takes implementation from ice driver
as a base because it has support for no IOVA in mbuf as well as all
vector implementations, and moves them to a common file.

The driver Rx rearm code used copious amounts of #ifdef-ery to
discriminate between 16- and 32-byte descriptor support, but we cannot do
that in the common code because we will not have access to those
definitions. So, instead, we use copious amounts of compile-time constant
propagation and force-inlining to ensure that the compiler generates
effectively the same code it generated back when it was in the driver. We
also add a compile-time definition for vectorization levels for x86
vector instructions to discriminate between different instruction sets.
This too is constant-propagated, and thus should not affect performance.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h               |   3 +
 drivers/net/intel/common/rx_vec_sse.h       | 323 ++++++++++++++++++++
 drivers/net/intel/ice/ice_rxtx.h            |   2 +-
 drivers/net/intel/ice/ice_rxtx_common_avx.h | 233 --------------
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  77 +----
 7 files changed, 336 insertions(+), 312 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_sse.h
 delete mode 100644 drivers/net/intel/ice/ice_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 2d9328ae89..65e920fdd1 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -14,6 +14,8 @@
 #define CI_RX_BURST 32
 #define CI_RX_MAX_BURST 32
 #define CI_RX_MAX_NSEG 2
+#define CI_VPMD_DESCS_PER_LOOP 4
+#define CI_VPMD_RX_REARM_THRESH 64
 
 struct ci_rx_queue;
 
@@ -40,6 +42,7 @@ struct ci_rx_queue {
 		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
 		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
 		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
+		volatile void *rx_ring; /**< Generic */
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
new file mode 100644
index 0000000000..6fe0baf38b
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_sse.h
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_SSE_H_
+#define _COMMON_INTEL_RX_VEC_SSE_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+
+#include "rx.h"
+
+enum ci_rx_vec_level {
+	CI_RX_VEC_LEVEL_SSE = 0,
+	CI_RX_VEC_LEVEL_AVX2,
+	CI_RX_VEC_LEVEL_AVX512,
+};
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void **)rxp,
+				 rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
+				_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr),
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m128i zero = _mm_setzero_si128();
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
+		__m128i vaddr0, vaddr1;
+		__m128i dma_addr0, dma_addr1;
+		struct rte_mbuf *mb0, *mb1;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* add headroom to address values */
+		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
+		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
+		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
+	}
+}
+
+#ifdef __AVX2__
+/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const size_t desc_len = 16;
+	volatile void *rxdp;
+	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m256i zero = _mm256_setzero_si256();
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+		mb2 = rxp[2].mbuf;
+		mb3 = rxp[3].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+
+		/* add headroom to address values */
+		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
+		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
+		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
+	}
+}
+#endif /* __AVX2__ */
+
+#ifdef __AVX512VL__
+/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const size_t desc_len = 16;
+	volatile void *rxdp;
+	int i;
+	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+	__m512i dma_addr0_3, dma_addr4_7;
+	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	__m512i zero = _mm512_setzero_si512();
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i vaddr4_5, vaddr6_7;
+		__m512i vaddr0_3, vaddr4_7;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+		mb2 = rxp[2].mbuf;
+		mb3 = rxp[3].mbuf;
+		mb4 = rxp[4].mbuf;
+		mb5 = rxp[5].mbuf;
+		mb6 = rxp[6].mbuf;
+		mb7 = rxp[7].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3, and so on.
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+		vaddr4_5 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+						vaddr5, 1);
+		vaddr6_7 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+						vaddr7, 1);
+		vaddr0_3 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+						vaddr2_3, 1);
+		vaddr4_7 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+						vaddr6_7, 1);
+
+		/* add headroom to address values */
+		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
+		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
+		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
+		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
+	}
+}
+#endif /* __AVX512VL__ */
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
+		const enum ci_rx_vec_level vec_level)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
+		return;
+
+	if (desc_len == 16) {
+		switch (vec_level) {
+		case CI_RX_VEC_LEVEL_AVX512:
+#ifdef __AVX512VL__
+			_ci_rxq_rearm_avx512(rxq);
+			break;
+#else
+			/* fall back to AVX2 unless requested not to */
+			/* fall through */
+#endif
+		case CI_RX_VEC_LEVEL_AVX2:
+#ifdef __AVX2__
+			_ci_rxq_rearm_avx2(rxq);
+			break;
+#else
+			/* fall back to SSE if AVX2 isn't supported */
+			/* fall through */
+#endif
+		case CI_RX_VEC_LEVEL_SSE:
+			_ci_rxq_rearm_sse(rxq, desc_len);
+			break;
+		}
+	} else {
+		/* for 32-byte descriptors only support SSE */
+		_ci_rxq_rearm_sse(rxq, desc_len);
+	}
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_SSE_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 1a39770d7d..72d0972587 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -43,7 +43,7 @@
 
 #define ICE_VPMD_RX_BURST           32
 #define ICE_VPMD_TX_BURST           32
-#define ICE_RXQ_REARM_THRESH        64
+#define ICE_RXQ_REARM_THRESH        CI_VPMD_RX_REARM_THRESH
 #define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
 #define ICE_TX_MAX_FREE_BUF_SZ      64
 #define ICE_DESCS_PER_LOOP          4
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
deleted file mode 100644
index 7209c902db..0000000000
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2019 Intel Corporation
- */
-
-#ifndef _ICE_RXTX_COMMON_AVX_H_
-#define _ICE_RXTX_COMMON_AVX_H_
-
-#include "ice_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, vaddr4_7);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__ */
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, vaddr2_3);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__ */
-
-#endif /* _ICE_RXTX_COMMON_AVX_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index f4555369a2..5ca3f92482 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -3,14 +3,15 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_AVX2);
 }
 
 static __rte_always_inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index 6eea74d703..883ea97c07 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -3,7 +3,8 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
@@ -12,7 +13,7 @@
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_AVX512);
 }
 
 static inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index dc9d37226a..fa0c7e8829 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -4,6 +4,8 @@
 
 #include "ice_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline __m128i
@@ -28,80 +30,7 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 static inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-					  RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				 offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 08/13] net/i40e: use common Rx rearm code
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (5 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 09/13] net/iavf: " Anatoly Burakov
                     ` (4 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

The i40e driver has an implementation of vectorized mbuf rearm code that
is identical to the one in the common code, so just use that.

In addition, the i40e has an implementation of Rx queue rearm for Neon
instruction set, so create a common header for Neon implementations too,
and use that in i40e Neon code.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2:
    - Fix compile issues on Arm64

 drivers/net/intel/common/rx_vec_neon.h        | 131 +++++++++++
 drivers/net/intel/i40e/i40e_rxtx.h            |   2 +-
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 215 ------------------
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  59 +----
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  70 +-----
 7 files changed, 144 insertions(+), 343 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_neon.h
 delete mode 100644 drivers/net/intel/i40e/i40e_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx_vec_neon.h b/drivers/net/intel/common/rx_vec_neon.h
new file mode 100644
index 0000000000..d79802b1c0
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_neon.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_NEON_H_
+#define _COMMON_INTEL_RX_VEC_NEON_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+#include <rte_vect.h>
+
+#include "rx.h"
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void **)rxp,
+				 rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			uint64x2_t zero = vdupq_n_u64(0);
+
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
+				vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr), zero);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_neon(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const uint64x2_t zero = vdupq_n_u64(0);
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile void *rxdp;
+	int i;
+
+	const uint8x8_t mbuf_init = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
+		uint64_t addr0, addr1;
+		uint64x2_t dma_addr0, dma_addr1;
+		struct rte_mbuf *mb0, *mb1;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		addr0 = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr0), dma_addr0);
+
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+		addr1 = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr1), dma_addr1);
+#else
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		addr0 = (uintptr_t)RTE_PTR_ADD(mb0->buf_addr, RTE_PKTMBUF_HEADROOM);
+		dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr0), dma_addr0);
+
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+		addr1 = (uintptr_t)RTE_PTR_ADD(mb1->buf_addr, RTE_PKTMBUF_HEADROOM);
+		dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr1), dma_addr1);
+#endif
+	}
+}
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
+		return;
+
+	_ci_rxq_rearm_neon(rxq, desc_len);
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_NEON_H_ */
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 4b5a84d8ef..8a41db2df3 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -13,7 +13,7 @@
 
 #define RTE_I40E_VPMD_RX_BURST        32
 #define RTE_I40E_VPMD_TX_BURST        32
-#define RTE_I40E_RXQ_REARM_THRESH      32
+#define RTE_I40E_RXQ_REARM_THRESH      CI_VPMD_RX_REARM_THRESH
 #define RTE_I40E_MAX_RX_BURST          RTE_I40E_RXQ_REARM_THRESH
 #define RTE_I40E_TX_MAX_FREE_BUF_SZ    64
 #define RTE_I40E_DESCS_PER_LOOP    4
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
deleted file mode 100644
index fd9447014b..0000000000
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2015 Intel Corporation
- */
-
-#ifndef _I40E_RXTX_COMMON_AVX_H_
-#define _I40E_RXTX_COMMON_AVX_H_
-#include <stdint.h>
-#include <ethdev_driver.h>
-#include <rte_malloc.h>
-
-#include "i40e_ethdev.h"
-#include "i40e_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__*/
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
-			vaddr2_3 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__*/
-
-#endif /*_I40E_RXTX_COMMON_AVX_H_*/
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 0f3f7430aa..260b7d700a 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -11,14 +11,15 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_AVX2);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index f2292b45e8..be004e9f4f 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -11,7 +11,8 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
@@ -20,7 +21,7 @@
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_AVX512);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 814aa666dc..6c21546471 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -16,65 +16,12 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_I40E_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc));
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 74cd59e245..432177d499 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -12,78 +12,14 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 09/13] net/iavf: use common Rx rearm code
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (6 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 10/13] net/ixgbe: " Anatoly Burakov
                     ` (3 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

The iavf driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/iavf/iavf_rxtx.h            |   4 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 199 ------------------
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  58 +----
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  72 +------
 6 files changed, 11 insertions(+), 328 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index c43ddc3c2f..d70250bf85 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -25,8 +25,8 @@
 /* used for Vector PMD */
 #define IAVF_VPMD_RX_MAX_BURST    32
 #define IAVF_VPMD_TX_MAX_BURST    32
-#define IAVF_RXQ_REARM_THRESH     32
-#define IAVF_VPMD_DESCS_PER_LOOP  4
+#define IAVF_RXQ_REARM_THRESH     CI_VPMD_RX_REARM_THRESH
+#define IAVF_VPMD_DESCS_PER_LOOP  CI_VPMD_DESCS_PER_LOOP
 #define IAVF_VPMD_TX_MAX_FREE_BUF 64
 
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index f51fa4acf9..496c7abc42 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2019 Intel Corporation
  */
 
+#include "../common/rx_vec_sse.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -9,7 +10,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), false);
 }
 
 #define PKTLEN_SHIFT     10
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index 80495f33cd..e7cd2b7c89 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include "../common/rx_vec_sse.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -30,7 +31,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index f0a7d19b6a..50228eb112 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -237,203 +237,4 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 	*txd_hi |= ((uint64_t)td_cmd) << IAVF_TXD_QW1_CMD_SHIFT;
 }
 
-#ifdef RTE_ARCH_X86
-static __rte_always_inline void
-iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxp,
-				 IAVF_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef CC_AVX512_SUPPORT
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-				i += 8, rxp += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-			mb4 = rxp[4];
-			mb5 = rxp[5];
-			mb6 = rxp[6];
-			mb7 = rxp[7];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-							vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-							vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
-			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
-		}
-	} else
-#endif
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-				i += 4, rxp += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
 #endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index e1c8f3c7f9..490028c68a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -14,64 +14,12 @@
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  IAVF_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i] = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0];
-		mb1 = rxep[1];
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc));
 }
 
 static inline void
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index f18dfd636c..3f0ca6cf8e 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -9,82 +9,14 @@
 #include "iavf.h"
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
-				 rxq->rx_free_thresh) < 0) {
-		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			rxq->rx_free_thresh;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += rxq->rx_free_thresh;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= rxq->rx_free_thresh;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-		   "rearm_start=%u rearm_nb=%u",
-		   rxq->port_id, rxq->queue_id,
-		   rx_id, rxq->rxrearm_start, rxq->rxrearm_nb);
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), false);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 10/13] net/ixgbe: use common Rx rearm code
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (7 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 09/13] net/iavf: " Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
                     ` (2 subsequent siblings)
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The ixgbe driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  2 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c | 66 +---------------
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  | 75 +------------------
 3 files changed, 7 insertions(+), 136 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 84e28eb254..f3dd32b9ff 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -37,7 +37,7 @@
 #define RTE_IXGBE_DESCS_PER_LOOP    4
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
-#define RTE_IXGBE_RXQ_REARM_THRESH      32
+#define RTE_IXGBE_RXQ_REARM_THRESH      CI_VPMD_RX_REARM_THRESH
 #define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
 #endif
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 630a2e6a1d..0842f213ef 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -11,72 +11,12 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-	uint8x8_t p;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read),
-					  zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/*
-		 * Flush mbuf with pkt template.
-		 * Data to be rearmed is 6 bytes long.
-		 */
-		vst1_u8((uint8_t *)&mb0->rearm_data, p);
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		vst1_u8((uint8_t *)&mb1->rearm_data, p);
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ixgbe_adv_rx_desc));
 }
 
 static inline void
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index ecfb0d6ba6..c6e90b8d41 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -10,83 +10,14 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
-		vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* set Header Buffer Address to zero */
-		dma_addr0 =  _mm_and_si128(dma_addr0, hba_msk);
-		dma_addr1 =  _mm_and_si128(dma_addr1, hba_msk);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ixgbe_adv_rx_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifdef RTE_LIB_SECURITY
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 11/13] net/intel: support wider x86 vectors for Rx rearm
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (8 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 10/13] net/ixgbe: " Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 13/13] net/intel: add common Tx " Anatoly Burakov
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson

Currently, for 32-byte descriptor format, only SSE instruction set is
supported. Add implementation for AVX2 and AVX512 instruction sets. This
implementation similarly constant-propagates everything at compile time and
thus should not affect performance of existing code paths. To improve code
readability and reduce code duplication due to supporting different sized
descriptors, the implementation is also refactored.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx_vec_sse.h | 380 ++++++++++++++------------
 1 file changed, 205 insertions(+), 175 deletions(-)

diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
index 6fe0baf38b..0aeaac3dc9 100644
--- a/drivers/net/intel/common/rx_vec_sse.h
+++ b/drivers/net/intel/common/rx_vec_sse.h
@@ -48,223 +48,258 @@ _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
 	return 0;
 }
 
-/*
- * SSE code path can handle both 16-byte and 32-byte descriptors with one code
- * path, as we only ever write 16 bytes at a time.
- */
-static __rte_always_inline void
-_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+static __rte_always_inline __m128i
+_ci_rxq_rearm_desc_sse(const __m128i vaddr)
 {
 	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m128i zero = _mm_setzero_si128();
+	__m128i reg;
+
+	/* add headroom to address values */
+	reg = _mm_add_epi64(vaddr, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			 offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+{
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint8_t desc_per_reg = 1;
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile void *rxdp;
 	int i;
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
-		__m128i vaddr0, vaddr1;
-		__m128i dma_addr0, dma_addr1;
-		struct rte_mbuf *mb0, *mb1;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		const struct rte_mbuf *mb0 = rxp[0].mbuf;
+		const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
+		const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+		const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* add headroom to address values */
-		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
-		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
-
-#if RTE_IOVA_IN_MBUF
-		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
-#endif
+		const __m128i reg0 = _ci_rxq_rearm_desc_sse(vaddr0);
+		const __m128i reg1 = _ci_rxq_rearm_desc_sse(vaddr1);
 
 		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), reg0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), reg1);
 	}
 }
 
 #ifdef __AVX2__
-/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
-static __rte_always_inline void
-_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+static __rte_always_inline __m256i
+_ci_rxq_rearm_desc_avx2(const __m128i vaddr0, const __m128i vaddr1)
 {
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const size_t desc_len = 16;
-	volatile void *rxdp;
 	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m256i zero = _mm256_setzero_si256();
+	__m256i reg;
+
+	/* merge by casting 0 to 256-bit and inserting 1 into the high lanes */
+	reg =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+					vaddr1, 1);
+
+	/* add headroom to address values */
+	reg = _mm256_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm256_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm256_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m256i) / desc_len;
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
+	volatile void *rxdp;
 	int i;
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
+	/* Initialize the mbufs in vector, process 2 or 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		__m256i reg0, reg1;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-		mb2 = rxp[2].mbuf;
-		mb3 = rxp[3].mbuf;
+		if (desc_per_iter == 2) {
+			/* 16 byte descriptor, 16 byte zero, times two */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, zero);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr1, zero);
+		} else {
+			/* 16 byte descriptor times four */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-		/* add headroom to address values */
-		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
-		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
-		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, vaddr1);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr2, vaddr3);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), reg0);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), reg1);
 	}
 }
 #endif /* __AVX2__ */
 
 #ifdef __AVX512VL__
-/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline __m512i
+_ci_rxq_rearm_desc_avx512(const __m128i vaddr0, const __m128i vaddr1,
+		const __m128i vaddr2, const __m128i vaddr3)
+{
+	const __m512i zero = _mm512_setzero_si512();
+	const __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	__m256i vaddr0_1, vaddr2_3;
+	__m512i reg;
+
+	/**
+	 * merge 0 & 1, by casting 0 to 256-bit and inserting 1 into the high
+	 * lanes. Similarly for 2 & 3.
+	 */
+	vaddr0_1 =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+					vaddr1, 1);
+	vaddr2_3 =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+					vaddr3, 1);
+	/*
+	 * merge 0+1 & 2+3, by casting 0+1 to 512-bit and inserting 2+3 into the
+	 * high lanes.
+	 */
+	reg =
+		_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+					vaddr2_3, 1);
+
+	/* add headroom to address values */
+	reg = _mm512_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm512_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm512_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
 static __rte_always_inline void
-_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq, const size_t desc_len)
 {
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const size_t desc_len = 16;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m512i) / desc_len;
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile void *rxdp;
 	int i;
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-	__m512i dma_addr0_3, dma_addr4_7;
-	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-	__m512i zero = _mm512_setzero_si512();
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
-	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
+	/* Initialize the mbufs in vector, process 4 or 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-		__m256i vaddr0_1, vaddr2_3;
-		__m256i vaddr4_5, vaddr6_7;
-		__m512i vaddr0_3, vaddr4_7;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		__m512i reg0, reg1;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-		mb2 = rxp[2].mbuf;
-		mb3 = rxp[3].mbuf;
-		mb4 = rxp[4].mbuf;
-		mb5 = rxp[5].mbuf;
-		mb6 = rxp[6].mbuf;
-		mb7 = rxp[7].mbuf;
+		if (desc_per_iter == 4) {
+			/* 16-byte descriptor, 16 byte zero, times four */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3, and so on.
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-		vaddr4_5 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-						vaddr5, 1);
-		vaddr6_7 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-						vaddr7, 1);
-		vaddr0_3 =
-			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						vaddr2_3, 1);
-		vaddr4_7 =
-			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						vaddr6_7, 1);
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, zero, vaddr1, zero);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr2, zero, vaddr3, zero);
+		} else {
+			/* 16-byte descriptor times eight */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
+			const struct rte_mbuf *mb4 = rxp[4].mbuf;
+			const struct rte_mbuf *mb5 = rxp[5].mbuf;
+			const struct rte_mbuf *mb6 = rxp[6].mbuf;
+			const struct rte_mbuf *mb7 = rxp[7].mbuf;
 
-		/* add headroom to address values */
-		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
-		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
+			const __m128i vaddr4 = _mm_loadu_si128((const __m128i *)&mb4->buf_addr);
+			const __m128i vaddr5 = _mm_loadu_si128((const __m128i *)&mb5->buf_addr);
+			const __m128i vaddr6 = _mm_loadu_si128((const __m128i *)&mb6->buf_addr);
+			const __m128i vaddr7 = _mm_loadu_si128((const __m128i *)&mb7->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
-		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
-		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, vaddr1, vaddr2, vaddr3);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr4, vaddr5, vaddr6, vaddr7);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), reg0);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), reg1);
 	}
 }
 #endif /* __AVX512VL__ */
@@ -280,31 +315,26 @@ ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
 	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
 		return;
 
-	if (desc_len == 16) {
-		switch (vec_level) {
-		case CI_RX_VEC_LEVEL_AVX512:
+	switch (vec_level) {
+	case CI_RX_VEC_LEVEL_AVX512:
 #ifdef __AVX512VL__
-			_ci_rxq_rearm_avx512(rxq);
-			break;
+		_ci_rxq_rearm_avx512(rxq, desc_len);
+		break;
 #else
-			/* fall back to AVX2 unless requested not to */
-			/* fall through */
+		/* fall back to AVX2 unless requested not to */
+		/* fall through */
 #endif
-		case CI_RX_VEC_LEVEL_AVX2:
+	case CI_RX_VEC_LEVEL_AVX2:
 #ifdef __AVX2__
-			_ci_rxq_rearm_avx2(rxq);
+			_ci_rxq_rearm_avx2(rxq, desc_len);
 			break;
 #else
 			/* fall back to SSE if AVX2 isn't supported */
 			/* fall through */
 #endif
-		case CI_RX_VEC_LEVEL_SSE:
-			_ci_rxq_rearm_sse(rxq, desc_len);
-			break;
-		}
-	} else {
-		/* for 32-byte descriptors only support SSE */
+	case CI_RX_VEC_LEVEL_SSE:
 		_ci_rxq_rearm_sse(rxq, desc_len);
+		break;
 	}
 
 	rxq->rxrearm_start += rearm_thresh;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 12/13] net/intel: add common Rx mbuf recycle
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (9 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  2025-05-12 10:58   ` [PATCH v2 13/13] net/intel: add common Tx " Anatoly Burakov
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Rx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

While we're at it, also support no-IOVA-in-mbuf case.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 67 +++++++++++++++++++
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 37 +---------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 35 +---------
 3 files changed, 73 insertions(+), 66 deletions(-)
 create mode 100644 drivers/net/intel/common/recycle_mbufs.h

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
new file mode 100644
index 0000000000..fd31c5c1ff
--- /dev/null
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -0,0 +1,67 @@
+#ifndef _COMMON_INTEL_RECYCLE_MBUFS_H_
+#define _COMMON_INTEL_RECYCLE_MBUFS_H_
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <ethdev_driver.h>
+
+#include "rx.h"
+#include "tx.h"
+
+/**
+ * Recycle mbufs for Rx queue.
+ *
+ * @param rxq Rx queue pointer
+ * @param nb_mbufs number of mbufs to recycle
+ * @param desc_len length of Rx descriptor
+ */
+static __rte_always_inline void
+ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
+		const size_t desc_len)
+{
+	struct ci_rx_entry *rxep;
+	volatile void *rxdp;
+	uint16_t rx_id;
+	uint16_t i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	for (i = 0; i < nb_mbufs; i++) {
+		volatile uint64_t *cur = RTE_PTR_ADD(rxdp, i * desc_len);
+
+#if RTE_IOVA_IN_MBUF
+		const uint64_t paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(paddr);
+#else
+		const uint64_t vaddr = (uintptr_t)rxep[i].mbuf->buf_addr +
+			RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(vaddr);
+#endif
+
+		/* 8 bytes PBA followed by 8 bytes HBA */
+		*(cur + 1) = 0;
+		*cur = dma_addr;
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += nb_mbufs;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = 0;
+		rx_id = rxq->nb_rx_desc - 1;
+	}
+
+	rxq->rxrearm_nb -= nb_mbufs;
+
+	rte_io_wmb();
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index aa7703216d..073357bee2 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -10,43 +10,12 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union i40e_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* flush desc with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs, sizeof(union i40e_rx_desc));
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index 1df1787c7f..e2c3523ed2 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -8,41 +8,12 @@
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* Flush descriptors with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs, sizeof(union ixgbe_adv_rx_desc));
 }
 
 uint16_t
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v2 13/13] net/intel: add common Tx mbuf recycle
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (10 preceding siblings ...)
  2025-05-12 10:58   ` [PATCH v2 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
@ 2025-05-12 10:58   ` Anatoly Burakov
  11 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 10:58 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Tx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 98 +++++++++++++++++++
 drivers/net/intel/common/tx.h                 |  1 +
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 88 +----------------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 89 +----------------
 4 files changed, 107 insertions(+), 169 deletions(-)

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
index fd31c5c1ff..88779c5aa4 100644
--- a/drivers/net/intel/common/recycle_mbufs.h
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -64,4 +64,102 @@ ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
 	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
 }
 
+/**
+ * Recycle buffers on Tx. Note: the function must first perform a driver-specific
+ * DD-bit-set check to ensure that the Tx descriptors are ready for recycling.
+ *
+ * @param txq Tx queue pointer
+ * @param recycle_rxq_info recycling mbuf information
+ *
+ * @return how many buffers were recycled
+ */
+static __rte_always_inline uint16_t
+ci_tx_recycle_mbufs(struct ci_tx_queue *txq,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct ci_tx_entry *txep;
+	struct rte_mbuf **rxep;
+	int i, n;
+	uint16_t nb_recycle_mbufs;
+	uint16_t avail = 0;
+	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+	uint16_t refill_head = *recycle_rxq_info->refill_head;
+	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+	/* Get available recycling Rx buffers. */
+	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+	/* Check Tx free thresh and Rx available space. */
+	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+		return 0;
+
+	n = txq->tx_rs_thresh;
+	nb_recycle_mbufs = n;
+
+	/* Mbufs recycle mode can only support no ring buffer wrapping around.
+	 * Two case for this:
+	 *
+	 * case 1: The refill head of Rx buffer ring needs to be aligned with
+	 * mbuf ring size. In this case, the number of Tx freeing buffers
+	 * should be equal to refill_requirement.
+	 *
+	 * case 2: The refill head of Rx ring buffer does not need to be aligned
+	 * with mbuf ring size. In this case, the update of refill head can not
+	 * exceed the Rx mbuf ring size.
+	 */
+	if ((refill_requirement && refill_requirement != n) ||
+		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+		return 0;
+
+	/* First buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1).
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+	rxep = recycle_rxq_info->mbuf_ring;
+	rxep += refill_head;
+
+	/* is fast-free enabled in offloads? */
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* Avoid txq containing buffers from unexpected mempool. */
+		if (unlikely(recycle_rxq_info->mp
+					!= txep[0].mbuf->pool))
+			return 0;
+
+		/* Directly put mbufs from Tx to Rx. */
+		for (i = 0; i < n; i++)
+			rxep[i] = txep[i].mbuf;
+	} else {
+		for (i = 0; i < n; i++) {
+			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+			/* If Tx buffers are not the last reference or from
+			 * unexpected mempool, previous copied buffers are
+			 * considered as invalid.
+			 */
+			if (unlikely(rxep[i] == NULL ||
+				recycle_rxq_info->mp != txep[i].mbuf->pool))
+				nb_recycle_mbufs = 0;
+		}
+		/* If Tx buffers are not the last reference or
+		 * from unexpected mempool, all recycled buffers
+		 * are put into mempool.
+		 */
+		if (nb_recycle_mbufs == 0)
+			for (i = 0; i < n; i++) {
+				if (rxep[i] != NULL)
+					rte_mempool_put(rxep[i]->pool, rxep[i]);
+			}
+	}
+
+	/* Update counters for Tx. */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+	return nb_recycle_mbufs;
+}
+
 #endif
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index c99bd5420f..cc70fa7db4 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -37,6 +37,7 @@ struct ci_tx_queue {
 		volatile struct ice_tx_desc *ice_tx_ring;
 		volatile struct idpf_base_tx_desc *idpf_tx_ring;
 		volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
+		volatile void *tx_ring; /**< Generic. */
 	};
 	volatile uint8_t *qtx_tail;               /* register address of tail */
 	union {
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 073357bee2..19edee781d 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -23,92 +23,12 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+	const uint64_t ctob = txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+	/* are Tx descriptors ready for recycling? */
+	if ((ctob & rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
 			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
 
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle mode can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * mbuf ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with mbuf ring size. In this case, the update of refill head can not
-	 * exceed the Rx mbuf ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
-		return 0;
-
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(txq, recycle_rxq_info);
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index e2c3523ed2..179205b422 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -21,92 +21,11 @@ ixgbe_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 		struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint32_t status;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+	const uint32_t status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
-	if (!(status & IXGBE_ADVTXD_STAT_DD))
-		return 0;
-
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * buffer ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with buffer ring size. In this case, the update of refill head can not
-	 * exceed the Rx buffer ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+	/* are Tx descriptors ready for recycling? */
+	if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
 		return 0;
 
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(tx_queue, recycle_rxq_info);
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (12 preceding siblings ...)
  2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
@ 2025-05-12 12:54 ` Anatoly Burakov
  2025-05-12 12:54   ` [PATCH v3 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
                     ` (13 more replies)
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
  14 siblings, 14 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
anywhere in the codebase, and can be removed.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 95c80ac1b8..0c07ce3186 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -3296,17 +3296,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	/*
 	 * Modified to setup VFRDT for Virtual Function
 	 */
-	if (ixgbe_is_vf(dev)) {
+	if (ixgbe_is_vf(dev))
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
-	} else {
+	else
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
-	}
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 641f982b01..20a5c5a0af 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -85,7 +85,6 @@ struct ixgbe_rx_queue {
 	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
 	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
 	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
 	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
 	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 02/13] net/iavf: make IPsec stats dynamically allocated
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-14 16:39     ` Bruce Richardson
  2025-05-12 12:54   ` [PATCH v3 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
                     ` (12 subsequent siblings)
  13 siblings, 1 reply; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

Currently, the stats structure is directly embedded in the queue structure.
We're about to move iavf driver to a common Rx queue structure, so we can't
have driver-specific structures that aren't pointers, inside the common
queue structure. To prepare, we replace direct embedding into the queue
structure with a pointer to the stats structure.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/iavf/iavf_ethdev.c |  2 +-
 drivers/net/intel/iavf/iavf_rxtx.c   | 21 ++++++++++++++++++---
 drivers/net/intel/iavf/iavf_rxtx.h   |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index b3dacbef84..5babd587b3 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -1870,7 +1870,7 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 		struct iavf_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
 		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
-		stats = &rxq->stats.ipsec_crypto;
+		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
 		ips->ierrors.count += stats->ierrors.count;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 5411eb6897..d23d2df807 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -619,6 +619,18 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	/* Allocate stats */
+	rxq->stats = rte_zmalloc_socket("iavf rxq stats",
+				 sizeof(struct iavf_rx_queue_stats),
+				 RTE_CACHE_LINE_SIZE,
+				 socket_id);
+	if (!rxq->stats) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for "
+			     "rx queue stats");
+		rte_free(rxq);
+		return -ENOMEM;
+	}
+
 	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
 		proto_xtr = vf->proto_xtr ? vf->proto_xtr[queue_idx] :
 				IAVF_PROTO_XTR_NONE;
@@ -677,6 +689,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 				   socket_id);
 	if (!rxq->sw_ring) {
 		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -693,6 +706,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!mz) {
 		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for RX");
 		rte_free(rxq->sw_ring);
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -1054,6 +1068,7 @@ iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 	iavf_rxq_release_mbufs_ops[q->rel_mbufs_type].release_mbufs(q);
 	rte_free(q->sw_ring);
 	rte_memzone_free(q->mz);
+	rte_free(q->stats);
 	rte_free(q);
 }
 
@@ -1581,7 +1596,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(rxm, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -1750,7 +1765,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(first_seg, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -2034,7 +2049,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
 			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
 			iavf_flex_rxd_to_ipsec_crypto_status(mb, &rxdp[j],
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
 			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 0b5d67e718..62b5a67c84 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -268,7 +268,7 @@ struct iavf_rx_queue {
 	uint8_t proto_xtr; /* protocol extraction type */
 	uint64_t xtr_ol_flag;
 		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats stats;
+	struct iavf_rx_queue_stats *stats;
 	uint64_t offloads;
 	uint64_t phc_time;
 	uint64_t hw_time_update;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 03/13] net/ixgbe: create common Rx queue structure
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-12 12:54   ` [PATCH v3 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-14 16:45     ` Bruce Richardson
  2025-05-12 12:54   ` [PATCH v3 04/13] net/i40e: use the " Anatoly Burakov
                     ` (11 subsequent siblings)
  13 siblings, 1 reply; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin

In preparation for deduplication effort, generalize the Rx queue structure.

Most of the fields are simply moved to common/rx.h, clarifying the comments
where necessary. There are some instances where the field is renamed when
moving, to make it more consistent with the rest of the codebase.

Specifically, the following fields are renamed:

- rdt_reg_addr -> qrx_tail (Rx ring tail register address)
- rx_using_sse -> vector_rx (indicates if vectorized path is enabled)
- mb_pool -> mp (other drivers use this name)

Additionally, some per-driver defines are now also moved to aforementioned
common Rx header, and re-defined in the driver using said common values.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  62 ++++++++
 drivers/net/intel/ixgbe/ixgbe_ethdev.c        |   8 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    |   8 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 149 +++++++++---------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  67 +-------
 .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |   4 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c |  22 +--
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  |  22 +--
 8 files changed, 172 insertions(+), 170 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index abb01ba5e7..524de39f9c 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -11,6 +11,68 @@
 #include <rte_ethdev.h>
 
 #define CI_RX_BURST 32
+#define CI_RX_MAX_BURST 32
+
+struct ci_rx_queue;
+
+struct ci_rx_entry {
+	struct rte_mbuf *mbuf; /* mbuf associated with RX descriptor. */
+};
+
+struct ci_rx_entry_sc {
+	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct ci_rx_queue {
+	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
+	union { /* RX ring virtual address */
+		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+	};
+	volatile uint8_t *qrx_tail;   /**< register address of tail */
+	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
+	struct ci_rx_entry_sc *sw_sc_ring; /**< address of scattered Rx software ring. */
+	rte_iova_t rx_ring_phys_addr; /**< RX ring DMA address. */
+	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+	/** hold packets to return to application */
+	struct rte_mbuf *rx_stage[CI_RX_MAX_BURST * 2];
+	uint16_t nb_rx_desc; /**< number of RX descriptors. */
+	uint16_t rx_tail;  /**< current value of tail register. */
+	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
+	uint16_t nb_rx_hold; /**< number of held free RX desc. */
+	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
+	uint16_t rx_free_thresh; /**< max free RX desc to hold. */
+	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
+	uint16_t rxrearm_nb;     /**< number of remaining to be re-armed */
+	uint16_t rxrearm_start;  /**< the idx we start the re-arming from */
+	uint16_t queue_id; /**< RX queue index. */
+	uint16_t port_id;  /**< Device port identifier. */
+	uint16_t reg_idx;  /**< RX queue register index. */
+	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool vector_rx; /**< indicates that vector RX is in use */
+	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
+	uint64_t mbuf_initializer; /**< value to init mbufs */
+	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
+	struct rte_mbuf fake_mbuf;
+	const struct rte_memzone *mz;
+	union {
+		struct { /* ixgbe specific values */
+			/** indicates that IPsec RX feature is in use */
+			uint8_t using_ipsec;
+			/** Packet type mask for different NICs. */
+			uint16_t pkt_type_mask;
+			/** UDP frames with a 0 checksum can be marked as checksum errors. */
+			uint8_t rx_udp_csum_zero_err;
+			/** flags to set in mbuf when a vlan is detected. */
+			uint64_t vlan_flags;
+		};
+	};
+};
 
 static inline uint16_t
 ci_rx_reassemble_packets(struct rte_mbuf **rx_bufs, uint16_t nb_bufs, uint8_t *split_flags,
diff --git a/drivers/net/intel/ixgbe/ixgbe_ethdev.c b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
index f1fd271a0a..df1eecc3c1 100644
--- a/drivers/net/intel/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
@@ -2022,7 +2022,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
 {
 	struct ixgbe_hwstrip *hwstrip =
 		IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
 		return;
@@ -2157,7 +2157,7 @@ ixgbe_vlan_hw_strip_config(struct rte_eth_dev *dev)
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t ctrl;
 	uint16_t i;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool on;
 
 	PMD_INIT_FUNC_TRACE();
@@ -2200,7 +2200,7 @@ ixgbe_config_vlan_strip_on_all_queues(struct rte_eth_dev *dev, int mask)
 {
 	uint16_t i;
 	struct rte_eth_rxmode *rxmode;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (mask & RTE_ETH_VLAN_STRIP_MASK) {
 		rxmode = &dev->data->dev_conf.rxmode;
@@ -5789,7 +5789,7 @@ ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
 static int
 ixgbevf_vlan_offload_config(struct rte_eth_dev *dev, int mask)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t i;
 	int on = 0;
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index c1b086ef6d..1df1787c7f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -11,15 +11,15 @@
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union ixgbe_adv_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
@@ -42,7 +42,7 @@ ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 0c07ce3186..4e4afd81e4 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -1423,11 +1423,11 @@ int
 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = &rxq->ixgbe_rx_ring[desc];
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.upper.status_error;
 
@@ -1567,10 +1567,10 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t pkt_flags;
@@ -1582,7 +1582,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 	uint64_t vlan_flags = rxq->vlan_flags;
 
 	/* get references to current descriptor and S/W ring entry */
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	status = rxdp->wb.upper.status_error;
@@ -1667,10 +1667,10 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 }
 
 static inline int
-ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
+ixgbe_rx_alloc_bufs(struct ci_rx_queue *rxq, bool reset_mbuf)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx;
 	__le64 dma_addr;
@@ -1679,12 +1679,12 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 	/* allocate buffers in bulk directly into the S/W ring */
 	alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
 	rxep = &rxq->sw_ring[alloc_idx];
-	diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
+	diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep,
 				    rxq->rx_free_thresh);
 	if (unlikely(diag != 0))
 		return -ENOMEM;
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = &rxq->ixgbe_rx_ring[alloc_idx];
 	for (i = 0; i < rxq->rx_free_thresh; ++i) {
 		/* populate the static rte mbuf fields */
 		mb = rxep[i].mbuf;
@@ -1711,7 +1711,7 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 }
 
 static inline uint16_t
-ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+ixgbe_rx_fill_from_stage(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
 	struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
@@ -1735,7 +1735,7 @@ static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	     uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	/* Any previously recv'd pkts will be returned from the Rx stage */
@@ -1778,8 +1778,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		/* update tail pointer */
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
-					    cur_free_trigger);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, cur_free_trigger);
 	}
 
 	if (rxq->rx_tail >= rxq->nb_rx_desc)
@@ -1825,11 +1824,11 @@ uint16_t
 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union ixgbe_adv_rx_desc *rx_ring;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
-	struct ixgbe_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
 	union ixgbe_adv_rx_desc rxd;
@@ -1847,7 +1846,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = rxq->ixgbe_rx_ring;
 	sw_ring = rxq->sw_ring;
 	vlan_flags = rxq->vlan_flags;
 	while (nb_rx < nb_pkts) {
@@ -1908,7 +1907,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) rx_id, (unsigned) staterr,
 			   (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
 
-		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (nmb == NULL) {
 			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
 				   "queue_id=%u", (unsigned) rxq->port_id,
@@ -2017,7 +2016,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) nb_rx);
 		rx_id = (uint16_t) ((rx_id == 0) ?
 				     (rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+		IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -2052,7 +2051,7 @@ static inline void
 ixgbe_fill_cluster_head_buf(
 	struct rte_mbuf *head,
 	union ixgbe_adv_rx_desc *desc,
-	struct ixgbe_rx_queue *rxq,
+	struct ci_rx_queue *rxq,
 	uint32_t staterr)
 {
 	uint32_t pkt_info;
@@ -2114,10 +2113,10 @@ static inline uint16_t
 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 		    bool bulk_alloc)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
-	struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
-	struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->ixgbe_rx_ring;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry_sc *sw_sc_ring = rxq->sw_sc_ring;
 	uint16_t rx_id = rxq->rx_tail;
 	uint16_t nb_rx = 0;
 	uint16_t nb_hold = rxq->nb_rx_hold;
@@ -2125,10 +2124,10 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 
 	while (nb_rx < nb_pkts) {
 		bool eop;
-		struct ixgbe_rx_entry *rxe;
-		struct ixgbe_scattered_rx_entry *sc_entry;
-		struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
-		struct ixgbe_rx_entry *next_rxe = NULL;
+		struct ci_rx_entry *rxe;
+		struct ci_rx_entry_sc *sc_entry;
+		struct ci_rx_entry_sc *next_sc_entry = NULL;
+		struct ci_rx_entry *next_rxe = NULL;
 		struct rte_mbuf *first_seg;
 		struct rte_mbuf *rxm;
 		struct rte_mbuf *nmb = NULL;
@@ -2165,7 +2164,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rte_le_to_cpu_16(rxd.wb.upper.length));
 
 		if (!bulk_alloc) {
-			nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+			nmb = rte_mbuf_raw_alloc(rxq->mp);
 			if (nmb == NULL) {
 				PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
 						  "port_id=%u queue_id=%u",
@@ -2181,7 +2180,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			if (!ixgbe_rx_alloc_bufs(rxq, false)) {
 				rte_wmb();
 				IXGBE_PCI_REG_WC_WRITE_RELAXED(
-							rxq->rdt_reg_addr,
+							rxq->qrx_tail,
 							next_rdt);
 				nb_hold -= rxq->rx_free_thresh;
 			} else {
@@ -2347,7 +2346,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
 
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, prev_id);
 		nb_hold = 0;
 	}
 
@@ -2969,12 +2968,12 @@ ixgbe_free_sc_cluster(struct rte_mbuf *m)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		ixgbe_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -3006,7 +3005,7 @@ ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release(struct ci_rx_queue *rxq)
 {
 	if (rxq != NULL) {
 		ixgbe_rx_queue_release_mbufs(rxq);
@@ -3032,7 +3031,7 @@ ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
  *           function must be used.
  */
 static inline int __rte_cold
-check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -3069,7 +3068,7 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
 
 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
 static void __rte_cold
-ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
+ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ci_rx_queue *rxq)
 {
 	static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
 	unsigned i;
@@ -3090,7 +3089,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
 	 * reads extra memory as zeros.
 	 */
 	for (i = 0; i < len; i++) {
-		rxq->rx_ring[i] = zeroed_desc;
+		rxq->ixgbe_rx_ring[i] = zeroed_desc;
 	}
 
 	/*
@@ -3205,7 +3204,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			 struct rte_mempool *mp)
 {
 	const struct rte_memzone *rz;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_hw     *hw;
 	uint16_t len;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
@@ -3234,11 +3233,11 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 
 	/* First allocate the rx queue data structure */
-	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
+	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE, socket_id);
 	if (rxq == NULL)
 		return -ENOMEM;
-	rxq->mb_pool = mp;
+	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
 	rxq->queue_id = queue_idx;
@@ -3297,14 +3296,14 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 * Modified to setup VFRDT for Virtual Function
 	 */
 	if (ixgbe_is_vf(dev))
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
 	else
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
+	rxq->ixgbe_rx_ring = (union ixgbe_adv_rx_desc *)rz->addr;
 
 	/*
 	 * Certain constraints must be met in order to use the bulk buffer
@@ -3329,7 +3328,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		len += RTE_PMD_IXGBE_RX_MAX_BURST;
 
 	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
-					  sizeof(struct ixgbe_rx_entry) * len,
+					  sizeof(struct ci_rx_entry) * len,
 					  RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3346,7 +3345,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 */
 	rxq->sw_sc_ring =
 		rte_zmalloc_socket("rxq->sw_sc_ring",
-				   sizeof(struct ixgbe_scattered_rx_entry) * len,
+				   sizeof(struct ci_rx_entry_sc) * len,
 				   RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_sc_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3355,7 +3354,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
 			    "dma_addr=0x%"PRIx64,
-		     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
+		     rxq->sw_ring, rxq->sw_sc_ring, rxq->ixgbe_rx_ring,
 		     rxq->rx_ring_phys_addr);
 
 	if (!rte_is_power_of_2(nb_desc)) {
@@ -3379,11 +3378,11 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 {
 #define IXGBE_RXQ_SCAN_INTERVAL 4
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 
 	while ((desc < rxq->nb_rx_desc) &&
 		(rxdp->wb.upper.status_error &
@@ -3391,7 +3390,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 		desc += IXGBE_RXQ_SCAN_INTERVAL;
 		rxdp += IXGBE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
+			rxdp = &(rxq->ixgbe_rx_ring[rxq->rx_tail +
 				desc - rxq->nb_rx_desc]);
 	}
 
@@ -3401,7 +3400,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 int
 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint32_t *status;
 	uint32_t nb_hold, desc;
 
@@ -3409,7 +3408,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 		return -EINVAL;
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-	if (rxq->rx_using_sse)
+	if (rxq->vector_rx)
 		nb_hold = rxq->rxrearm_nb;
 	else
 #endif
@@ -3421,7 +3420,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.upper.status_error;
+	status = &rxq->ixgbe_rx_ring[desc].wb.upper.status_error;
 	if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
 		return RTE_ETH_RX_DESC_DONE;
 
@@ -3506,7 +3505,7 @@ ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
 	}
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 		if (rxq != NULL) {
 			ixgbe_rx_queue_release_mbufs(rxq);
@@ -4668,16 +4667,16 @@ ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
 }
 
 static int __rte_cold
-ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ixgbe_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	unsigned int i;
 
 	/* Initialize software ring entries */
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ixgbe_adv_rx_desc *rxd;
-		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
+		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (mbuf == NULL) {
 			PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
@@ -4690,7 +4689,7 @@ ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
 
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
-		rxd = &rxq->rx_ring[i];
+		rxd = &rxq->ixgbe_rx_ring[i];
 		rxd->read.hdr_addr = 0;
 		rxd->read.pkt_addr = dma_addr;
 		rxe[i].mbuf = mbuf;
@@ -5109,9 +5108,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 		dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
-		rxq->rx_using_sse = rx_using_sse;
+		rxq->vector_rx = rx_using_sse;
 #ifdef RTE_LIB_SECURITY
 		rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_SECURITY);
@@ -5187,7 +5186,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 
 	/* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		uint32_t srrctl =
 			IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
 		uint32_t rscctl =
@@ -5217,7 +5216,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 		 */
 
 		rscctl |= IXGBE_RSCCTL_RSCEN;
-		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
+		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mp);
 		psrtype |= IXGBE_PSRTYPE_TCPHDR;
 
 		/*
@@ -5263,7 +5262,7 @@ int __rte_cold
 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint64_t bus_addr;
 	uint32_t rxctrl;
 	uint32_t fctrl;
@@ -5374,7 +5373,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -5559,7 +5558,7 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t dmatxctl;
 	uint32_t rxctrl;
@@ -5646,7 +5645,7 @@ int __rte_cold
 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5689,7 +5688,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5823,11 +5822,11 @@ void
 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
-	qinfo->mp = rxq->mb_pool;
+	qinfo->mp = rxq->mp;
 	qinfo->scattered_rx = dev->data->scattered_rx;
 	qinfo->nb_desc = rxq->nb_rx_desc;
 
@@ -5861,13 +5860,13 @@ void
 ixgbe_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
 
 	rxq = dev->data->rx_queues[queue_id];
 
 	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
-	recycle_rxq_info->mp = rxq->mb_pool;
+	recycle_rxq_info->mp = rxq->mp;
 	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
 	recycle_rxq_info->receive_tail = &rxq->rx_tail;
 
@@ -5889,7 +5888,7 @@ int __rte_cold
 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
 	uint64_t bus_addr;
@@ -5972,7 +5971,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -6076,7 +6075,7 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t rxdctl;
 	uint16_t i;
@@ -6270,7 +6269,7 @@ ixgbe_recv_scattered_pkts_vec(
 }
 
 int
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue __rte_unused * rxq)
 {
 	return -1;
 }
@@ -6290,7 +6289,7 @@ ixgbe_txq_vec_setup(struct ci_tx_queue *txq __rte_unused)
 }
 
 void
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue __rte_unused * rxq)
 {
 	return;
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 20a5c5a0af..84e28eb254 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _IXGBE_RXTX_H_
 #define _IXGBE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 
 /*
@@ -30,7 +31,7 @@
 #define	IXGBE_MAX_RING_DESC	8192
 
 #define RTE_PMD_IXGBE_TX_MAX_BURST 32
-#define RTE_PMD_IXGBE_RX_MAX_BURST 32
+#define RTE_PMD_IXGBE_RX_MAX_BURST CI_RX_MAX_BURST
 #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
 
 #define RTE_IXGBE_DESCS_PER_LOOP    4
@@ -66,66 +67,6 @@
 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
 #define IXGBE_PACKET_TYPE_SHIFT             0X04
 
-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct ixgbe_rx_entry {
-	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-struct ixgbe_scattered_rx_entry {
-	struct rte_mbuf *fbuf; /**< First segment of the fragmented packet. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct ixgbe_rx_queue {
-	struct rte_mempool  *mb_pool; /**< mbuf pool to populate RX ring. */
-	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
-	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
-	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
-	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
-	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
-	uint64_t            mbuf_initializer; /**< value to init mbufs */
-	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
-	uint16_t            rx_tail;  /**< current value of RDT register. */
-	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
-	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
-	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	uint8_t            rx_using_sse;
-	/**< indicates that vector RX is in use */
-#ifdef RTE_LIB_SECURITY
-	uint8_t            using_ipsec;
-	/**< indicates that IPsec RX feature is in use */
-#endif
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
-	uint16_t            rxrearm_nb;     /**< number of remaining to be re-armed */
-	uint16_t            rxrearm_start;  /**< the idx we start the re-arming from */
-#endif
-	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
-	uint16_t            queue_id; /**< RX queue index. */
-	uint16_t            reg_idx;  /**< RX queue register index. */
-	uint16_t            pkt_type_mask;  /**< Packet type mask for different NICs. */
-	uint16_t            port_id;  /**< Device port identifier. */
-	uint8_t             crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
-	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
-	uint8_t             rx_deferred_start; /**< not in global dev start. */
-	/** UDP frames with a 0 checksum can be marked as checksum errors. */
-	uint8_t             rx_udp_csum_zero_err;
-	/** flags to set in mbuf when a vlan is detected. */
-	uint64_t            vlan_flags;
-	uint64_t	    offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
-	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
-	struct rte_mbuf fake_mbuf;
-	/** hold packets to return to application */
-	struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
-	const struct rte_memzone *mz;
-};
-
 /**
  * IXGBE CTX Constants
  */
@@ -230,8 +171,8 @@ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t ixgbe_recv_scattered_pkts_vec(void *rx_queue,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);
-void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
+int ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq);
+void ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 int ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt);
 
 extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
index 018010820f..0ba3d7a4c0 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
@@ -69,7 +69,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 }
 
 static inline void
-_ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+_ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 
@@ -173,7 +173,7 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 		return -1;
 
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads))
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 9ccd8eba25..630a2e6a1d 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -12,22 +12,22 @@
 #include "ixgbe_rxtx_vec_common.h"
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 	uint8x8_t p;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
+	if (unlikely(rte_mempool_get_bulk(rxq->mp,
 					  (void *)rxep,
 					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -76,7 +76,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 static inline void
@@ -282,11 +282,11 @@ desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint8x16_t shuf_msk = {
@@ -309,7 +309,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -488,7 +488,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -634,7 +634,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -657,7 +657,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index e125f52cc5..ecfb0d6ba6 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -13,12 +13,12 @@
 #include <rte_vect.h>
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
@@ -26,10 +26,10 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 
 	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mb_pool,
+	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
 				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -86,7 +86,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
 
 #ifdef RTE_LIB_SECURITY
@@ -327,11 +327,11 @@ desc_to_ptype_v(__m128i descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 #ifdef RTE_LIB_SECURITY
 	uint8_t use_ipsec = rxq->using_ipsec;
@@ -377,7 +377,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,7 +755,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -778,7 +778,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 04/13] net/i40e: use the common Rx queue structure
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-12 12:54   ` [PATCH v3 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
  2025-05-12 12:54   ` [PATCH v3 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-14 16:52     ` Bruce Richardson
  2025-05-12 12:54   ` [PATCH v3 05/13] net/ice: " Anatoly Burakov
                     ` (10 subsequent siblings)
  13 siblings, 1 reply; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

Make the i40e driver use the new common Rx queue structure.

Because the i40e driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_I40E_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue structure
will have both, while the actual descriptor format is picked by i40e at
compile time using the above macro. Direct usage of Rx queue structure is
now meant to be replaced with a macro access that takes descriptor size
into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  14 ++
 drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
 drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
 drivers/net/intel/i40e/i40e_fdir.c            |  16 +--
 .../i40e/i40e_recycle_mbufs_vec_common.c      |   6 +-
 drivers/net/intel/i40e/i40e_rxtx.c            | 126 +++++++++---------
 drivers/net/intel/i40e/i40e_rxtx.h            |  74 +++-------
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h |   6 +-
 .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  22 +--
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  12 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  12 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  24 ++--
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  24 ++--
 14 files changed, 160 insertions(+), 188 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 524de39f9c..db49db57d0 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -30,6 +30,8 @@ struct ci_rx_queue {
 	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
 	union { /* RX ring virtual address */
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
+		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -51,14 +53,22 @@ struct ci_rx_queue {
 	uint16_t queue_id; /**< RX queue index. */
 	uint16_t port_id;  /**< Device port identifier. */
 	uint16_t reg_idx;  /**< RX queue register index. */
+	uint16_t rx_buf_len; /* The packet buffer size */
+	uint16_t rx_hdr_len; /* The header buffer size */
+	uint16_t max_pkt_len; /* Maximum packet length */
 	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool q_set; /**< indicate if rx queue has been configured */
 	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
 	bool vector_rx; /**< indicates that vector RX is in use */
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
+	union { /* the VSI this queue belongs to */
+		struct i40e_vsi *i40e_vsi;
+	};
 	const struct rte_memzone *mz;
 	union {
 		struct { /* ixgbe specific values */
@@ -71,6 +81,10 @@ struct ci_rx_queue {
 			/** flags to set in mbuf when a vlan is detected. */
 			uint64_t vlan_flags;
 		};
+		struct { /* i40e specific values */
+			uint8_t hs_mode; /**< Header Split mode */
+			uint8_t dcb_tc; /**< Traffic class of rx queue */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/i40e/i40e_ethdev.c b/drivers/net/intel/i40e/i40e_ethdev.c
index 90eba3419f..e0a865845b 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.c
+++ b/drivers/net/intel/i40e/i40e_ethdev.c
@@ -6609,7 +6609,7 @@ i40e_dev_rx_init(struct i40e_pf *pf)
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int ret = I40E_SUCCESS;
 	uint16_t i;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	i40e_pf_config_rss(pf);
 	for (i = 0; i < data->nb_rx_queues; i++) {
@@ -8974,7 +8974,7 @@ i40e_pf_calc_configured_queues_num(struct i40e_pf *pf)
 {
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int i, num;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	num = 0;
 	for (i = 0; i < pf->lan_nb_qps; i++) {
diff --git a/drivers/net/intel/i40e/i40e_ethdev.h b/drivers/net/intel/i40e/i40e_ethdev.h
index ccc8732d7d..44864292d0 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.h
+++ b/drivers/net/intel/i40e/i40e_ethdev.h
@@ -333,7 +333,7 @@ struct i40e_vsi_list {
 	struct i40e_vsi *vsi;
 };
 
-struct i40e_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 /* Bandwidth limit information */
@@ -739,7 +739,7 @@ struct i40e_fdir_info {
 	struct i40e_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	uint16_t match_counter_index;  /* Statistic counter index used for fdir*/
 	struct ci_tx_queue *txq;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt[I40E_FDIR_PRG_PKT_CNT];     /* memory for fdir program packet */
 	uint64_t dma_addr[I40E_FDIR_PRG_PKT_CNT]; /* physic address of packet memory*/
 	/*
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 94e3ab44e3..eadcf63d1d 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -100,9 +100,9 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status);
 
 static int
-i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_fdir_rx_queue_init(struct ci_rx_queue *rxq)
 {
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct i40e_hmc_obj_rxq rx_ctx;
 	int err = I40E_SUCCESS;
 
@@ -139,7 +139,7 @@ i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
 		return err;
 	}
 	rxq->qrx_tail = hw->hw_addr +
-		I40E_QRX_TAIL(rxq->vsi->base_queue);
+		I40E_QRX_TAIL(rxq->i40e_vsi->base_queue);
 
 	rte_wmb();
 	/* Init the RX tail register. */
@@ -382,7 +382,7 @@ i40e_fdir_rx_proc_enable(struct rte_eth_dev *dev, bool on)
 	int32_t i;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		rxq->fdir_enabled = on;
@@ -929,7 +929,7 @@ i40e_build_ctob(uint32_t td_cmd,
  * tx queue
  */
 static inline int
-i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
+i40e_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
 	uint64_t qword1;
@@ -938,7 +938,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 	uint32_t error;
 	int ret = 0;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & I40E_RXD_QW1_STATUS_MASK)
 			>> I40E_RXD_QW1_STATUS_SHIFT;
@@ -987,7 +987,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 }
 
 static inline void
-i40e_fdir_programming_status_cleanup(struct i40e_rx_queue *rxq)
+i40e_fdir_programming_status_cleanup(struct ci_rx_queue *rxq)
 {
 	uint16_t retry_count = 0;
 
@@ -1627,7 +1627,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct i40e_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	const struct i40e_fdir_action *fdir_action = &filter->action;
 	volatile struct i40e_tx_desc *txdp;
 	volatile struct i40e_filter_program_desc *fdirdp;
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 2875c578af..aa7703216d 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -13,15 +13,15 @@
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union i40e_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index c3ff2e05c3..96490296ba 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -94,12 +94,12 @@ i40e_monitor_callback(const uint64_t value,
 int
 i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile union i40e_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = I40E_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.qword1.status_error_len;
 
@@ -416,9 +416,9 @@ i40e_xmit_cleanup(struct ci_tx_queue *txq)
 
 static inline int
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-check_rx_burst_bulk_alloc_preconditions(struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 #else
-check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(__rte_unused struct ci_rx_queue *rxq)
 #endif
 {
 	int ret = 0;
@@ -456,10 +456,10 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
 #error "PMD I40E: I40E_LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
+i40e_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -467,9 +467,9 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 	int32_t s[I40E_LOOK_AHEAD], var, nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
@@ -558,7 +558,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 }
 
 static inline uint16_t
-i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
+i40e_rx_fill_from_stage(struct ci_rx_queue *rxq,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
@@ -577,10 +577,10 @@ i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
 }
 
 static inline int
-i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
+i40e_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -597,7 +597,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = I40E_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -629,7 +629,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = (struct i40e_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	struct rte_eth_dev *dev;
 	uint16_t nb_rx = 0;
 
@@ -648,7 +648,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (i40e_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 
@@ -707,12 +707,12 @@ i40e_recv_pkts_bulk_alloc(void __rte_unused *rx_queue,
 uint16_t
 i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union i40e_rx_desc *rx_ring;
 	volatile union i40e_rx_desc *rxdp;
 	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -729,9 +729,9 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = I40E_RX_RING(rxq);
 	sw_ring = rxq->sw_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -745,7 +745,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -837,12 +837,12 @@ i40e_recv_scattered_pkts(void *rx_queue,
 			 struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	volatile union i40e_rx_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union i40e_rx_desc *rx_ring = I40E_RX_RING(rxq);
 	volatile union i40e_rx_desc *rxdp;
 	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring = rxq->sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -853,7 +853,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 	uint64_t qword1;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -867,7 +867,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -1798,7 +1798,7 @@ i40e_get_queue_offset_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
 int
 i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -1841,7 +1841,7 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -2004,7 +2004,7 @@ i40e_dev_first_queue(uint16_t idx, void **queues, int num)
 
 static int
 i40e_dev_rx_queue_setup_runtime(struct rte_eth_dev *dev,
-				struct i40e_rx_queue *rxq)
+				struct ci_rx_queue *rxq)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
@@ -2081,7 +2081,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct i40e_vsi *vsi;
 	struct i40e_pf *pf = NULL;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size;
 	uint16_t len, i;
@@ -2116,7 +2116,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("i40e rx queue",
-				 sizeof(struct i40e_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -2135,7 +2135,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	else
 		rxq->crc_len = 0;
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->i40e_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->offloads = offloads;
 
@@ -2164,14 +2164,14 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	memset(rz->addr, 0, ring_size);
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	I40E_RX_RING(rxq) = (union i40e_rx_desc *)rz->addr;
 
 	len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST);
 
 	/* Allocate the software ring. */
 	rxq->sw_ring =
 		rte_zmalloc_socket("i40e rx sw ring",
-				   sizeof(struct i40e_rx_entry) * len,
+				   sizeof(struct ci_rx_entry) * len,
 				   RTE_CACHE_LINE_SIZE,
 				   socket_id);
 	if (!rxq->sw_ring) {
@@ -2242,7 +2242,7 @@ i40e_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 void
 i40e_rx_queue_release(void *rxq)
 {
-	struct i40e_rx_queue *q = (struct i40e_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -2260,11 +2260,11 @@ i40e_dev_rx_queue_count(void *rx_queue)
 {
 #define I40E_RXQ_SCAN_INTERVAL 4
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	while ((desc < rxq->nb_rx_desc) &&
 		((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT) &
@@ -2277,8 +2277,8 @@ i40e_dev_rx_queue_count(void *rx_queue)
 		desc += I40E_RXQ_SCAN_INTERVAL;
 		rxdp += I40E_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-					desc - rxq->nb_rx_desc]);
+			rxdp = I40E_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -2287,7 +2287,7 @@ i40e_dev_rx_queue_count(void *rx_queue)
 int
 i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -2302,7 +2302,7 @@ i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
+	status = &I40E_RX_RING_PTR(rxq, desc)->wb.qword1.status_error_len;
 	mask = rte_le_to_cpu_64((1ULL << I40E_RX_DESC_STATUS_DD_SHIFT)
 		<< I40E_RXD_QW1_STATUS_SHIFT);
 	if (*status & mask)
@@ -2628,12 +2628,12 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
 }
 
 void
-i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		i40e_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -2663,7 +2663,7 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 }
 
 void
-i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
+i40e_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 	uint16_t len;
@@ -2681,7 +2681,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 		len = rxq->nb_rx_desc;
 
 	for (i = 0; i < len * sizeof(union i40e_rx_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)I40E_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; ++i)
@@ -2898,9 +2898,9 @@ i40e_tx_queue_init(struct ci_tx_queue *txq)
 }
 
 int
-i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
+i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct i40e_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
@@ -2922,7 +2922,7 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = I40E_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
@@ -2941,10 +2941,10 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
  * and maximum packet length.
  */
 static int
-i40e_rx_queue_config(struct i40e_rx_queue *rxq)
+i40e_rx_queue_config(struct ci_rx_queue *rxq)
 {
-	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->vsi);
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->i40e_vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct rte_eth_dev_data *data = pf->dev_data;
 	uint16_t buf_size;
 
@@ -2988,11 +2988,11 @@ i40e_rx_queue_config(struct i40e_rx_queue *rxq)
 
 /* Init the RX queue in hardware */
 int
-i40e_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_rx_queue_init(struct ci_rx_queue *rxq)
 {
 	int err = I40E_SUCCESS;
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
-	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
+	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->i40e_vsi);
 	uint16_t pf_q = rxq->reg_idx;
 	uint16_t buf_size;
 	struct i40e_hmc_obj_rxq rx_ctx;
@@ -3166,7 +3166,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 enum i40e_status_code
 i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -3180,7 +3180,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("i40e fdir rx queue",
-				  sizeof(struct i40e_rx_queue),
+				  sizeof(struct ci_rx_queue),
 				  RTE_CACHE_LINE_SIZE,
 				  SOCKET_ID_ANY);
 	if (!rxq) {
@@ -3206,11 +3206,11 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 	rxq->nb_rx_desc = I40E_FDIR_NUM_RX_DESC;
 	rxq->queue_id = I40E_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->i40e_vsi = pf->fdir.fdir_vsi;
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, I40E_FDIR_NUM_RX_DESC * sizeof(union i40e_rx_desc));
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	I40E_RX_RING(rxq) = (union i40e_rx_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -3226,7 +3226,7 @@ void
 i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -3264,7 +3264,7 @@ void
 i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
@@ -3335,7 +3335,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 		if (ad->rx_vec_allowed) {
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
-				struct i40e_rx_queue *rxq =
+				struct ci_rx_queue *rxq =
 					dev->data->rx_queues[i];
 
 				if (rxq && i40e_rxq_vec_setup(rxq)) {
@@ -3438,10 +3438,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 			 dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
 
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
-			struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+			struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 			if (rxq)
-				rxq->rx_using_sse = rx_using_sse;
+				rxq->vector_rx = rx_using_sse;
 		}
 	}
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 2f32fc5686..4b5a84d8ef 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -6,8 +6,9 @@
 #define _I40E_RXTX_H_
 
 #include "../common/tx.h"
+#include "../common/rx.h"
 
-#define RTE_PMD_I40E_RX_MAX_BURST 32
+#define RTE_PMD_I40E_RX_MAX_BURST CI_RX_MAX_BURST
 #define RTE_PMD_I40E_TX_MAX_BURST 32
 
 #define RTE_I40E_VPMD_RX_BURST        32
@@ -67,62 +68,19 @@ enum i40e_header_split_mode {
 			       I40E_HEADER_SPLIT_UDP_TCP | \
 			       I40E_HEADER_SPLIT_SCTP)
 
-/* HW desc structure, both 16-byte and 32-byte types are supported */
+/* HW desc structures, both 16-byte and 32-byte types are supported */
 #ifdef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 #define i40e_rx_desc i40e_16byte_rx_desc
+#define I40E_RX_RING(rxq) \
+	((rxq)->i40e_rx_16b_ring)
 #else
 #define i40e_rx_desc i40e_32byte_rx_desc
+#define I40E_RX_RING(rxq) \
+	((rxq)->i40e_rx_32b_ring)
 #endif
 
-struct i40e_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
-/*
- * Structure associated with each RX queue.
- */
-struct i40e_rx_queue {
-	struct rte_mempool *mp; /**< mbuf pool to populate RX ring */
-	volatile union i40e_rx_desc *rx_ring;/**< RX ring virtual address */
-	uint64_t rx_ring_phys_addr; /**< RX ring DMA address */
-	struct i40e_rx_entry *sw_ring; /**< address of RX soft ring */
-	uint16_t nb_rx_desc; /**< number of RX descriptors */
-	uint16_t rx_free_thresh; /**< max free RX desc to hold */
-	uint16_t rx_tail; /**< current value of tail */
-	uint16_t nb_rx_hold; /**< number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[RTE_PMD_I40E_RX_MAX_BURST * 2];
-#endif
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /**< device port ID */
-	uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /**< 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /**< RX queue index */
-	uint16_t reg_idx; /**< RX queue register index */
-	uint8_t drop_en; /**< if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /**< register address of tail */
-	struct i40e_vsi *vsi; /**< the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	uint8_t hs_mode; /* Header Split mode */
-	bool q_set; /**< indicate if rx queue has been configured */
-	bool rx_deferred_start; /**< don't start this queue in dev start */
-	uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
-	uint8_t dcb_tc;         /**< Traffic class of rx queue */
-	uint64_t offloads; /**< Rx offload flags of RTE_ETH_RX_OFFLOAD_* */
-	const struct rte_memzone *mz;
-};
+#define I40E_RX_RING_PTR(rxq, entry) \
+	(I40E_RX_RING(rxq) + (entry))
 
 /** Offload features */
 union i40e_tx_offload {
@@ -172,16 +130,16 @@ uint16_t i40e_simple_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 int i40e_tx_queue_init(struct ci_tx_queue *txq);
-int i40e_rx_queue_init(struct i40e_rx_queue *rxq);
+int i40e_rx_queue_init(struct ci_rx_queue *rxq);
 void i40e_free_tx_resources(struct ci_tx_queue *txq);
-void i40e_free_rx_resources(struct i40e_rx_queue *rxq);
+void i40e_free_rx_resources(struct ci_rx_queue *rxq);
 void i40e_dev_clear_queues(struct rte_eth_dev *dev);
 void i40e_dev_free_queues(struct rte_eth_dev *dev);
-void i40e_reset_rx_queue(struct i40e_rx_queue *rxq);
+void i40e_reset_rx_queue(struct ci_rx_queue *rxq);
 void i40e_reset_tx_queue(struct ci_tx_queue *txq);
 int i40e_tx_done_cleanup(void *txq, uint32_t free_cnt);
-int i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq);
-void i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq);
+int i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq);
 
 uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
@@ -197,9 +155,9 @@ uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts);
 int i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int i40e_rxq_vec_setup(struct i40e_rx_queue *rxq);
+int i40e_rxq_vec_setup(struct ci_rx_queue *rxq);
 int i40e_txq_vec_setup(struct ci_tx_queue *txq);
-void i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 uint16_t i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				   uint16_t nb_pkts);
 void i40e_set_rx_function(struct rte_eth_dev *dev);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
index b66a808f9f..fd9447014b 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
@@ -13,14 +13,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index 42beff6e89..3e4109e82e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -16,13 +16,13 @@
 #include <rte_altivec.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
 
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 
 	__vector unsigned long hdr_room = (__vector unsigned long){
@@ -30,7 +30,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 						RTE_PKTMBUF_HEADROOM};
 	__vector unsigned long dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING(rxq) + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -195,16 +195,16 @@ desc_to_ptype_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__vector unsigned char shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__vector unsigned short crc_adjust = (__vector unsigned short){
 		0, 0,         /* ignore pkt_type field */
@@ -221,7 +221,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -465,7 +465,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -611,15 +611,15 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 9c406e7a6f..0f3f7430aa 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -16,7 +16,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, false);
 }
@@ -105,16 +105,16 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union i40e_rx_desc *rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 	rte_prefetch0(rxdp);
 
@@ -625,7 +625,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index d8244556c0..f2292b45e8 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -18,7 +18,7 @@
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, true);
 }
@@ -108,14 +108,14 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			  uint16_t nb_pkts, uint8_t *split_packet)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union i40e_rx_desc *rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -693,7 +693,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
index ba72df8e13..d19b9e4bf4 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
@@ -21,7 +21,7 @@ i40e_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+_i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned mask = rxq->nb_rx_desc - 1;
 	unsigned i;
@@ -68,7 +68,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 	 */
 	ad->rx_vec_allowed = true;
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads)) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index d16ceb6b5d..814aa666dc 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -17,18 +17,18 @@
 #include "i40e_rxtx_vec_common.h"
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
@@ -203,7 +203,7 @@ descs_to_fdir_16b(uint32x4_t fltstat, uint64x2_t descs[4], struct rte_mbuf **rx_
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	uint32x4_t vlan0, vlan1, rss, l3_l4e;
@@ -332,15 +332,15 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -374,7 +374,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -592,7 +592,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -738,15 +738,15 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 774519265b..74cd59e245 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -15,18 +15,18 @@
 #include <rte_vect.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -207,7 +207,7 @@ descs_to_fdir_16b(__m128i fltstat, __m128i descs[4], struct rte_mbuf **rx_pkt)
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 		  __m128i descs[4], struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -347,16 +347,16 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -382,7 +382,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,15 +755,15 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 05/13] net/ice: use the common Rx queue structure
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (2 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 04/13] net/i40e: use the " Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-14 16:56     ` Bruce Richardson
  2025-05-12 12:54   ` [PATCH v3 06/13] net/iavf: " Anatoly Burakov
                     ` (9 subsequent siblings)
  13 siblings, 1 reply; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson

Make the ice driver use the new common Rx queue structure.

Because the ice driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_ICE_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue
structure will have both, while the actual descriptor format is picked by
ice at compile time using the above macro. Direct usage of Rx queue
structure is now meant to be replaced with a macro access that takes
descriptor size into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2:
    - Make xtr_field_offs of type ptrdiff_t instead of off_t to fix 32-bit compile
      issues

 drivers/net/intel/common/rx.h               |  23 +++
 drivers/net/intel/ice/ice_dcf.c             |   6 +-
 drivers/net/intel/ice/ice_dcf_ethdev.c      |  22 +--
 drivers/net/intel/ice/ice_ethdev.c          |   2 +-
 drivers/net/intel/ice/ice_ethdev.h          |   5 +-
 drivers/net/intel/ice/ice_rxtx.c            | 158 ++++++++++----------
 drivers/net/intel/ice/ice_rxtx.h            |  78 ++--------
 drivers/net/intel/ice/ice_rxtx_common_avx.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |  14 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |  16 +-
 drivers/net/intel/ice/ice_rxtx_vec_common.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  22 +--
 12 files changed, 164 insertions(+), 194 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index db49db57d0..9a691971bc 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -5,6 +5,7 @@
 #ifndef _COMMON_INTEL_RX_H_
 #define _COMMON_INTEL_RX_H_
 
+#include <stddef.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <rte_mbuf.h>
@@ -12,6 +13,7 @@
 
 #define CI_RX_BURST 32
 #define CI_RX_MAX_BURST 32
+#define CI_RX_MAX_NSEG 2
 
 struct ci_rx_queue;
 
@@ -23,6 +25,8 @@ struct ci_rx_entry_sc {
 	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
 };
 
+typedef void (*ci_rx_release_mbufs_t)(struct ci_rx_queue *rxq);
+
 /**
  * Structure associated with each RX queue.
  */
@@ -32,6 +36,8 @@ struct ci_rx_queue {
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
 		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
 		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
+		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
+		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -64,10 +70,16 @@ struct ci_rx_queue {
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	uint32_t rxdid; /**< RX descriptor format ID. */
+	uint32_t proto_xtr; /* protocol extraction type */
+	uint64_t xtr_ol_flag; /* flexible descriptor metadata extraction offload flag */
+	ptrdiff_t xtr_field_offs; /* Protocol extraction matedata offset*/
+	uint64_t hw_time_update; /**< Last time HW timestamp was updated */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
+		struct ice_vsi *ice_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -85,6 +97,17 @@ struct ci_rx_queue {
 			uint8_t hs_mode; /**< Header Split mode */
 			uint8_t dcb_tc; /**< Traffic class of rx queue */
 		};
+		struct { /* ice specific values */
+			ci_rx_release_mbufs_t rx_rel_mbufs; /**< release mbuf function */
+			/** holds buffer split information */
+			struct rte_eth_rxseg_split rxseg[CI_RX_MAX_NSEG];
+			struct ci_rx_entry *sw_split_buf; /**< Buffer split SW ring */
+			uint32_t rxseg_nb; /**< number of buffer split segments */
+			uint32_t time_high; /* high 32 bits of hardware timestamp register */
+			uint32_t hw_time_high; /* high 32 bits of timestamp */
+			uint32_t hw_time_low; /* low 32 bits of timestamp */
+			bool ts_enable; /* if rxq timestamp is enabled */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/ice/ice_dcf.c b/drivers/net/intel/ice/ice_dcf.c
index 65c18921f4..fddf5bbde5 100644
--- a/drivers/net/intel/ice/ice_dcf.c
+++ b/drivers/net/intel/ice/ice_dcf.c
@@ -1175,8 +1175,8 @@ ice_dcf_init_rss(struct ice_dcf_hw *hw)
 int
 ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)hw->eth_dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)hw->eth_dev->data->rx_queues;
 	struct ci_tx_queue **txq =
 		(struct ci_tx_queue **)hw->eth_dev->data->tx_queues;
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -1211,7 +1211,7 @@ ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 
 		vc_qp->rxq.max_pkt_size = rxq[i]->max_pkt_len;
 		vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
-		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_dma;
+		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 		vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
 
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index efff76afa8..53272ddd60 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -106,7 +106,7 @@ ice_dcf_xmit_pkts(__rte_unused void *tx_queue,
 }
 
 static int
-ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
+ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -145,8 +145,8 @@ ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
 static int
 ice_dcf_init_rx_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -282,7 +282,7 @@ ice_dcf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 }
 
 static int
-alloc_rxq_mbufs(struct ice_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
@@ -305,7 +305,7 @@ alloc_rxq_mbufs(struct ice_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = ICE_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
@@ -324,7 +324,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct iavf_hw *hw = &ad->real_hw.avf;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -358,7 +358,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 }
 
 static inline void
-reset_rx_queue(struct ice_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -369,7 +369,7 @@ reset_rx_queue(struct ice_rx_queue *rxq)
 	len = rxq->nb_rx_desc + ICE_RX_MAX_BURST;
 
 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)ICE_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
@@ -429,7 +429,7 @@ ice_dcf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -511,7 +511,7 @@ ice_dcf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 ice_dcf_start_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int nb_rxq = 0;
 	int nb_txq, i;
@@ -638,7 +638,7 @@ ice_dcf_stop_queues(struct rte_eth_dev *dev)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int ret, i;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.c b/drivers/net/intel/ice/ice_ethdev.c
index 2e163d706c..65cf586502 100644
--- a/drivers/net/intel/ice/ice_ethdev.c
+++ b/drivers/net/intel/ice/ice_ethdev.c
@@ -6690,7 +6690,7 @@ ice_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_adapter *ad =
 			ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t ts_high;
 	uint64_t ts_ns;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.h b/drivers/net/intel/ice/ice_ethdev.h
index afe8dae497..0ed223d83e 100644
--- a/drivers/net/intel/ice/ice_ethdev.h
+++ b/drivers/net/intel/ice/ice_ethdev.h
@@ -257,9 +257,6 @@ struct ice_vsi_list {
 	struct ice_vsi *vsi;
 };
 
-struct ice_rx_queue;
-struct ci_tx_queue;
-
 /**
  * Structure that defines a VSI, associated with a adapter.
  */
@@ -409,7 +406,7 @@ struct ice_fdir_counter_pool_container {
 struct ice_fdir_info {
 	struct ice_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	struct ci_tx_queue *txq;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt;                 /* memory for fdir program packet */
 	uint64_t dma_addr;             /* physic address of packet memory*/
 	const struct rte_memzone *mz;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 40ac01e782..4749ee729f 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -37,11 +37,11 @@ int
 ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = ICE_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.status_error0;
 
@@ -73,7 +73,7 @@ ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union ice_rx_flex_desc *rxdp)
 {
@@ -95,7 +95,7 @@ ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				   struct rte_mbuf *mb,
 				   volatile union ice_rx_flex_desc *rxdp)
 {
@@ -120,7 +120,7 @@ ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
 				      volatile union ice_rx_flex_desc *rxdp)
 {
@@ -164,7 +164,7 @@ ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
 				      volatile union ice_rx_flex_desc *rxdp)
 {
@@ -215,7 +215,7 @@ static const ice_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[] = {
 };
 
 void
-ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
+ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -243,17 +243,17 @@ ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
 }
 
 static int
-ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	struct ice_pf *pf = ICE_VSI_TO_PF(vsi);
-	struct rte_eth_dev_data *dev_data = rxq->vsi->adapter->pf.dev_data;
+	struct rte_eth_dev_data *dev_data = rxq->ice_vsi->adapter->pf.dev_data;
 	struct ice_rlan_ctx rx_ctx;
 	uint16_t buf_size;
 	uint32_t rxdid = ICE_RXDID_COMMS_OVS;
 	uint32_t regval;
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 	uint32_t frame_size = dev_data->mtu + ICE_ETH_OVERHEAD;
 	int err;
 
@@ -371,7 +371,7 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 		rx_ctx.dtype = 0; /* No Protocol Based Buffer Split mode */
 	}
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -452,15 +452,15 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 /* Allocate mbufs for all descriptors in rx queue */
 static int
-ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
+ice_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ice_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ice_rx_flex_desc *rxd;
-		rxd = &rxq->rx_ring[i];
+		rxd = ICE_RX_RING_PTR(rxq, i);
 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (unlikely(!mbuf)) {
@@ -514,7 +514,7 @@ ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
 
 /* Free all mbufs for descriptors in rx queue */
 static void
-_ice_rx_queue_release_mbufs(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -591,7 +591,7 @@ ice_switch_rx_queue(struct ice_hw *hw, uint16_t q_idx, bool on)
 }
 
 static inline int
-ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
+ice_check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -618,9 +618,9 @@ ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
 	return ret;
 }
 
-/* reset fields in ice_rx_queue back to default */
+/* reset fields in ci_rx_queue back to default */
 static void
-ice_reset_rx_queue(struct ice_rx_queue *rxq)
+ice_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 	uint16_t len;
@@ -633,7 +633,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 	len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST);
 
 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)ICE_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < ICE_RX_MAX_BURST; ++i)
@@ -655,7 +655,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -715,7 +715,7 @@ ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 ice_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -834,9 +834,9 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 }
 
 static int
-ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_fdir_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	uint32_t rxdid = ICE_RXDID_LEGACY_1;
 	struct ice_rlan_ctx rx_ctx;
@@ -848,7 +848,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -909,7 +909,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_fdir_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1099,7 +1099,7 @@ ice_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 int
 ice_fdir_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1170,7 +1170,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct ice_vsi *vsi = pf->main_vsi;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size, tlen;
 	uint16_t len;
@@ -1206,7 +1206,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket(NULL,
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 
@@ -1240,7 +1240,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 		rxq->crc_len = 0;
 
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->ice_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->proto_xtr = pf->proto_xtr != NULL ?
 			 pf->proto_xtr[queue_idx] : PROTO_XTR_NONE;
@@ -1274,8 +1274,8 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	/* Zero all the descriptors in the ring. */
 	memset(rz->addr, 0, ring_size);
 
-	rxq->rx_ring_dma = rz->iova;
-	rxq->rx_ring = rz->addr;
+	rxq->rx_ring_phys_addr = rz->iova;
+	ICE_RX_RING(rxq) = rz->addr;
 
 	/* always reserve more for bulk alloc */
 	len = (uint16_t)(nb_desc + ICE_RX_MAX_BURST);
@@ -1287,7 +1287,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the software ring. */
 	rxq->sw_ring = rte_zmalloc_socket(NULL,
-					  sizeof(struct ice_rx_entry) * tlen,
+					  sizeof(struct ci_rx_entry) * tlen,
 					  RTE_CACHE_LINE_SIZE,
 					  socket_id);
 	if (!rxq->sw_ring) {
@@ -1324,7 +1324,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 void
 ice_rx_queue_release(void *rxq)
 {
-	struct ice_rx_queue *q = (struct ice_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -1548,7 +1548,7 @@ void
 ice_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		 struct rte_eth_rxq_info *qinfo)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -1586,11 +1586,11 @@ ice_rx_queue_count(void *rx_queue)
 {
 #define ICE_RXQ_SCAN_INTERVAL 4
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	while ((desc < rxq->nb_rx_desc) &&
 	       rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	       (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)) {
@@ -1602,8 +1602,8 @@ ice_rx_queue_count(void *rx_queue)
 		desc += ICE_RXQ_SCAN_INTERVAL;
 		rxdp += ICE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-				 desc - rxq->nb_rx_desc]);
+			rxdp = ICE_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -1695,25 +1695,25 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
 #define ICE_PTP_TS_VALID 0x1
 
 static inline int
-ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
+ice_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len, hdr_len;
 	int32_t s[ICE_LOOK_AHEAD], nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags = 0;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 #endif
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -1843,7 +1843,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 }
 
 static inline uint16_t
-ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
+ice_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -1862,10 +1862,10 @@ ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
 }
 
 static inline int
-ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
+ice_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -1894,7 +1894,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 		}
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = ICE_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -1933,7 +1933,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = (struct ice_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -1951,7 +1951,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (ice_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 			PMD_RX_LOG(DEBUG, "Rx mbuf alloc failed for "
 				   "port_id=%u, queue_id=%u",
@@ -2006,12 +2006,12 @@ ice_recv_scattered_pkts(void *rx_queue,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_flex_desc *rx_ring = ICE_RX_RING(rxq);
 	volatile union ice_rx_flex_desc *rxdp;
 	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
@@ -2023,13 +2023,13 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2050,7 +2050,7 @@ ice_recv_scattered_pkts(void *rx_queue,
 		/* allocate mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
@@ -2319,7 +2319,7 @@ int
 ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint32_t desc;
 
 	if (unlikely(offset >= rxq->nb_rx_desc))
@@ -2332,7 +2332,7 @@ ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = ICE_RX_RING_PTR(rxq, desc);
 	if (rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	    (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S))
 		return RTE_ETH_RX_DESC_DONE;
@@ -2459,7 +2459,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
 int
 ice_fdir_setup_rx_resources(struct ice_pf *pf)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -2473,7 +2473,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("ice fdir rx queue",
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 SOCKET_ID_ANY);
 	if (!rxq) {
@@ -2499,12 +2499,12 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 	rxq->nb_rx_desc = ICE_FDIR_NUM_RX_DESC;
 	rxq->queue_id = ICE_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->ice_vsi = pf->fdir.fdir_vsi;
 
-	rxq->rx_ring_dma = rz->iova;
+	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, ICE_FDIR_NUM_RX_DESC *
 	       sizeof(union ice_32byte_rx_desc));
-	rxq->rx_ring = (union ice_rx_flex_desc *)rz->addr;
+	ICE_RX_RING(rxq) = (union ice_rx_flex_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -2523,12 +2523,12 @@ ice_recv_pkts(void *rx_queue,
 	      struct rte_mbuf **rx_pkts,
 	      uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_flex_desc *rx_ring = ICE_RX_RING(rxq);
 	volatile union ice_rx_flex_desc *rxdp;
 	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
 	struct rte_mbuf *nmb_pay; /* new allocated payload mbuf */
 	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
@@ -2540,13 +2540,13 @@ ice_recv_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2567,7 +2567,7 @@ ice_recv_pkts(void *rx_queue,
 		/* allocate header mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 
@@ -2594,7 +2594,7 @@ ice_recv_pkts(void *rx_queue,
 			/* allocate payload mbuf */
 			nmb_pay = rte_mbuf_raw_alloc(rxq->rxseg[1].mp);
 			if (unlikely(!nmb_pay)) {
-				rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+				rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 				rxe->mbuf = NULL;
 				nb_hold--;
 				if (unlikely(rx_id == 0))
@@ -3472,7 +3472,7 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int i;
 	int rx_check_ret = -1;
 
@@ -4634,7 +4634,7 @@ ice_set_default_ptype_table(struct rte_eth_dev *dev)
  * tx queue
  */
 static inline int
-ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
+ice_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union ice_32byte_rx_desc *rxdp;
 	uint64_t qword1;
@@ -4644,7 +4644,7 @@ ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
 	int ret = -EAGAIN;
 
 	rxdp = (volatile union ice_32byte_rx_desc *)
-		(&rxq->rx_ring[rxq->rx_tail]);
+			ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & ICE_RXD_QW1_STATUS_M)
 			>> ICE_RXD_QW1_STATUS_S;
@@ -4689,7 +4689,7 @@ int
 ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct ice_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	volatile struct ice_fltr_desc *fdirdp;
 	volatile struct ice_tx_desc *txdp;
 	uint32_t td_cmd;
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 276d40b57f..1a39770d7d 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _ICE_RXTX_H_
 #define _ICE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 #include "ice_ethdev.h"
 
@@ -14,21 +15,28 @@
 #define ICE_DMA_MEM_ALIGN    4096
 #define ICE_RING_BASE_ALIGN  128
 
-#define ICE_RX_MAX_BURST 32
+#define ICE_RX_MAX_BURST CI_RX_MAX_BURST
 #define ICE_TX_MAX_BURST 32
 
 /* Maximal number of segments to split. */
-#define ICE_RX_MAX_NSEG 2
+#define ICE_RX_MAX_NSEG CI_RX_MAX_NSEG
 
 #define ICE_CHK_Q_ENA_COUNT        100
 #define ICE_CHK_Q_ENA_INTERVAL_US  100
 
 #ifdef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 #define ice_rx_flex_desc ice_16b_rx_flex_desc
+#define ICE_RX_RING(rxq) \
+	((rxq)->ice_rx_16b_ring)
 #else
 #define ice_rx_flex_desc ice_32b_rx_flex_desc
+#define ICE_RX_RING(rxq) \
+	((rxq)->ice_rx_32b_ring)
 #endif
 
+#define ICE_RX_RING_PTR(rxq, entry) \
+	(ICE_RX_RING(rxq) + (entry))
+
 #define ICE_SUPPORT_CHAIN_NUM 5
 
 #define ICE_TD_CMD                      ICE_TX_DESC_CMD_EOP
@@ -78,74 +86,16 @@ extern int ice_timestamp_dynfield_offset;
 
 #define ICE_TX_MTU_SEG_MAX	8
 
-typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
-typedef void (*ice_rxd_to_pkt_fields_t)(struct ice_rx_queue *rxq,
+typedef void (*ice_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 					struct rte_mbuf *mb,
 					volatile union ice_rx_flex_desc *rxdp);
 
-struct ice_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
 enum ice_rx_dtype {
 	ICE_RX_DTYPE_NO_SPLIT       = 0,
 	ICE_RX_DTYPE_HEADER_SPLIT   = 1,
 	ICE_RX_DTYPE_SPLIT_ALWAYS   = 2,
 };
 
-struct ice_rx_queue {
-	struct rte_mempool *mp; /* mbuf pool to populate RX ring */
-	volatile union ice_rx_flex_desc *rx_ring;/* RX ring virtual address */
-	rte_iova_t rx_ring_dma; /* RX ring DMA address */
-	struct ice_rx_entry *sw_ring; /* address of RX soft ring */
-	uint16_t nb_rx_desc; /* number of RX descriptors */
-	uint16_t rx_free_thresh; /* max free RX desc to hold */
-	uint16_t rx_tail; /* current value of tail */
-	uint16_t nb_rx_hold; /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-	struct rte_mbuf *rx_stage[ICE_RX_MAX_BURST * 2];
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /* device port ID */
-	uint8_t crc_len; /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /* RX queue index */
-	uint16_t reg_idx; /* RX queue register index */
-	uint8_t drop_en; /* if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /* register address of tail */
-	struct ice_vsi *vsi; /* the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	bool q_set; /* indicate if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	uint8_t proto_xtr; /* Protocol extraction from flexible descriptor */
-	int xtr_field_offs; /*Protocol extraction matedata offset*/
-	uint64_t xtr_ol_flag; /* Protocol extraction offload flag */
-	uint32_t rxdid; /* Receive Flex Descriptor profile ID */
-	ice_rx_release_mbufs_t rx_rel_mbufs;
-	uint64_t offloads;
-	uint32_t time_high;
-	uint32_t hw_register_set;
-	const struct rte_memzone *mz;
-	uint32_t hw_time_high; /* high 32 bits of timestamp */
-	uint32_t hw_time_low; /* low 32 bits of timestamp */
-	uint64_t hw_time_update; /* SW time of HW record updating */
-	struct ice_rx_entry *sw_split_buf;
-	/* address of temp buffer for RX split mbufs */
-	struct rte_eth_rxseg_split rxseg[ICE_RX_MAX_NSEG];
-	uint32_t rxseg_nb;
-	bool ts_enable; /* if rxq timestamp is enabled */
-};
-
 /* Offload features */
 union ice_tx_offload {
 	uint64_t data;
@@ -249,12 +199,12 @@ int ice_tx_descriptor_status(void *tx_queue, uint16_t offset);
 void ice_set_default_ptype_table(struct rte_eth_dev *dev);
 const uint32_t *ice_dev_supported_ptypes_get(struct rte_eth_dev *dev,
 					     size_t *no_of_elements);
-void ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq,
+void ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq,
 					  uint32_t rxdid);
 
 int ice_rx_vec_dev_check(struct rte_eth_dev *dev);
 int ice_tx_vec_dev_check(struct rte_eth_dev *dev);
-int ice_rxq_vec_setup(struct ice_rx_queue *rxq);
+int ice_rxq_vec_setup(struct ci_rx_queue *rxq);
 int ice_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
@@ -299,7 +249,7 @@ int ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 #define FDIR_PARSING_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->pf.dev_data->nb_rx_queues; i++) { \
-		struct ice_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
index c62e60c70e..7209c902db 100644
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ b/drivers/net/intel/ice/ice_rxtx_common_avx.h
@@ -9,14 +9,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 0c54b325c6..f4555369a2 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -8,7 +8,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, false);
 }
@@ -33,17 +33,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet,
 			    bool offload)
 {
 #define ICE_DESCS_PER_LOOP_AVX 8
 
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ice_rx_flex_desc *rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -445,7 +445,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -694,7 +694,7 @@ static __rte_always_inline uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts, bool offload)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index bd49be07c9..6eea74d703 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -10,7 +10,7 @@
 #define ICE_DESCS_PER_LOOP_AVX 8
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, true);
 }
@@ -35,17 +35,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
+_ice_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			      struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts,
 			      uint8_t *split_packet,
 			      bool do_offload)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ice_rx_flex_desc *rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -467,7 +467,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -723,7 +723,7 @@ static uint16_t
 ice_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -765,7 +765,7 @@ ice_recv_scattered_burst_vec_avx512_offload(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_common.h b/drivers/net/intel/ice/ice_rxtx_vec_common.h
index 7933c26366..9430a99ba5 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/intel/ice/ice_rxtx_vec_common.h
@@ -17,7 +17,7 @@ ice_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -79,7 +79,7 @@ _ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
 #define ICE_VECTOR_OFFLOAD_PATH	1
 
 static inline int
-ice_rx_vec_queue_default(struct ice_rx_queue *rxq)
+ice_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -119,7 +119,7 @@ static inline int
 ice_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret = 0;
 	int result = 0;
 
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 97f05ba45e..dc9d37226a 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -26,18 +26,18 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 }
 
 static inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 					  RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -105,7 +105,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 }
 
 static inline void
-ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
+ice_rx_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 			 struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -301,15 +301,15 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	__m128i crc_adjust = _mm_set_epi16
 				(0, 0, 0,       /* ignore non-length fields */
 				 -rxq->crc_len, /* sub crc on data_len */
@@ -361,7 +361,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -482,7 +482,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		 * needs to load 2nd 16B of each desc for RSS hash parsing,
 		 * will cause performance drop to get into this context.
 		 */
-		if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+		if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 			/* load bottom half of every 32B desc */
 			const __m128i raw_desc_bh3 =
@@ -608,7 +608,7 @@ static uint16_t
 ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -779,7 +779,7 @@ ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 int __rte_cold
-ice_rxq_vec_setup(struct ice_rx_queue *rxq)
+ice_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 06/13] net/iavf: use the common Rx queue structure
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (3 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 05/13] net/ice: " Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-15 10:59     ` Bruce Richardson
  2025-05-12 12:54   ` [PATCH v3 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
                     ` (8 subsequent siblings)
  13 siblings, 1 reply; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin, Ian Stokes

Make the iavf driver use the new common Rx queue structure.

Because the iavf driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_IAVF_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue structure
will have both, while the actual descriptor format is picked by iavf at
compile time using the above macro. Direct usage of Rx queue structure is
now meant to be replaced with a macro access that takes descriptor size
into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2:
    - Fix compile issues for Arm

 drivers/net/intel/common/rx.h                 |  12 ++
 drivers/net/intel/iavf/iavf.h                 |   4 +-
 drivers/net/intel/iavf/iavf_ethdev.c          |  12 +-
 drivers/net/intel/iavf/iavf_rxtx.c            | 192 +++++++++---------
 drivers/net/intel/iavf/iavf_rxtx.h            |  76 ++-----
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  24 +--
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  22 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  27 ++-
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  30 +--
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  46 ++---
 drivers/net/intel/iavf/iavf_vchnl.c           |   6 +-
 11 files changed, 207 insertions(+), 244 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 9a691971bc..2d9328ae89 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -38,6 +38,8 @@ struct ci_rx_queue {
 		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
 		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
 		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
+		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
+		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -80,6 +82,7 @@ struct ci_rx_queue {
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
 		struct ice_vsi *ice_vsi;
+		struct iavf_vsi *iavf_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -108,6 +111,15 @@ struct ci_rx_queue {
 			uint32_t hw_time_low; /* low 32 bits of timestamp */
 			bool ts_enable; /* if rxq timestamp is enabled */
 		};
+		struct { /* iavf specific values */
+			const struct iavf_rxq_ops *ops; /**< queue ops */
+			struct iavf_rx_queue_stats *stats; /**< per-queue stats */
+			uint64_t phc_time; /**< HW timestamp */
+			uint8_t rel_mbufs_type; /**< type of release mbuf function */
+			uint8_t rx_flags; /**< Rx VLAN tag location flags */
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
+		};
 	};
 };
 
diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 97e6b243fb..f81c939c96 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -97,7 +97,7 @@
 #define IAVF_L2TPV2_FLAGS_LEN	0x4000
 
 struct iavf_adapter;
-struct iavf_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 
@@ -555,7 +555,7 @@ int iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
 		uint8_t *resp_msg, size_t resp_msg_len);
 extern const struct rte_tm_ops iavf_tm_ops;
 int iavf_get_ptp_cap(struct iavf_adapter *adapter);
-int iavf_get_phc_time(struct iavf_rx_queue *rxq);
+int iavf_get_phc_time(struct ci_rx_queue *rxq);
 int iavf_flow_sub(struct iavf_adapter *adapter,
 		  struct iavf_fsub_conf *filter);
 int iavf_flow_unsub(struct iavf_adapter *adapter,
diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index 5babd587b3..4e843a3532 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -728,7 +728,7 @@ iavf_dev_configure(struct rte_eth_dev *dev)
 }
 
 static int
-iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
+iavf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -779,8 +779,8 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
 static int
 iavf_init_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue **rxq =
-		(struct iavf_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret = IAVF_SUCCESS;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -955,7 +955,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 static int
 iavf_start_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 	uint16_t nb_txq, nb_rxq;
@@ -1867,9 +1867,9 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 {
 	uint16_t idx;
 	for (idx = 0; idx < ethdev->data->nb_rx_queues; idx++) {
-		struct iavf_rx_queue *rxq;
+		struct ci_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
-		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
+		rxq = (struct ci_rx_queue *)ethdev->data->rx_queues[idx];
 		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index d23d2df807..a9ce4b55d9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -128,12 +128,12 @@ iavf_monitor_callback(const uint64_t value,
 int
 iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile union iavf_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = IAVF_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.qword1.status_error_len;
 
@@ -222,7 +222,7 @@ check_tx_vec_allow(struct ci_tx_queue *txq)
 }
 
 static inline bool
-check_rx_bulk_allow(struct iavf_rx_queue *rxq)
+check_rx_bulk_allow(struct ci_rx_queue *rxq)
 {
 	int ret = true;
 
@@ -243,7 +243,7 @@ check_rx_bulk_allow(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-reset_rx_queue(struct iavf_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -254,12 +254,12 @@ reset_rx_queue(struct iavf_rx_queue *rxq)
 	len = rxq->nb_rx_desc + IAVF_RX_MAX_BURST;
 
 	for (i = 0; i < len * sizeof(union iavf_rx_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)IAVF_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
 	for (i = 0; i < IAVF_RX_MAX_BURST; i++)
-		rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf;
+		rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
 
 	/* for rx bulk */
 	rxq->rx_nb_avail = 0;
@@ -315,7 +315,7 @@ reset_tx_queue(struct ci_tx_queue *txq)
 }
 
 static int
-alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	volatile union iavf_rx_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
@@ -326,8 +326,8 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!mbuf)) {
 			for (j = 0; j < i; j++) {
-				rte_pktmbuf_free_seg(rxq->sw_ring[j]);
-				rxq->sw_ring[j] = NULL;
+				rte_pktmbuf_free_seg(rxq->sw_ring[j].mbuf);
+				rxq->sw_ring[j].mbuf = NULL;
 			}
 			PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
 			return -ENOMEM;
@@ -342,7 +342,7 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = IAVF_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
@@ -350,14 +350,14 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		rxd->read.rsvd2 = 0;
 #endif
 
-		rxq->sw_ring[i] = mbuf;
+		rxq->sw_ring[i].mbuf = mbuf;
 	}
 
 	return 0;
 }
 
 static inline void
-release_rxq_mbufs(struct iavf_rx_queue *rxq)
+release_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -365,9 +365,9 @@ release_rxq_mbufs(struct iavf_rx_queue *rxq)
 		return;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
-		if (rxq->sw_ring[i]) {
-			rte_pktmbuf_free_seg(rxq->sw_ring[i]);
-			rxq->sw_ring[i] = NULL;
+		if (rxq->sw_ring[i].mbuf) {
+			rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+			rxq->sw_ring[i].mbuf = NULL;
 		}
 	}
 
@@ -395,7 +395,7 @@ struct iavf_rxq_ops iavf_rxq_release_mbufs_ops[] = {
 };
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				    struct rte_mbuf *mb,
 				    volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -420,7 +420,7 @@ iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -462,7 +462,7 @@ iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -517,7 +517,7 @@ iavf_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[IAVF_RXDID_LAST + 1] = {
 };
 
 static void
-iavf_select_rxd_to_pkt_fields_handler(struct iavf_rx_queue *rxq, uint32_t rxdid)
+iavf_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -572,7 +572,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	struct iavf_info *vf =
 		IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_vsi *vsi = &vf->vsi;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *mz;
 	uint32_t ring_size;
 	uint8_t proto_xtr;
@@ -610,7 +610,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("iavf rxq",
-				 sizeof(struct iavf_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -668,7 +668,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	rxq->port_id = dev->data->port_id;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->rx_hdr_len = 0;
-	rxq->vsi = vsi;
+	rxq->iavf_vsi = vsi;
 	rxq->offloads = offloads;
 
 	if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
@@ -713,7 +713,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	/* Zero all the descriptors in the ring. */
 	memset(mz->addr, 0, ring_size);
 	rxq->rx_ring_phys_addr = mz->iova;
-	rxq->rx_ring = (union iavf_rx_desc *)mz->addr;
+	IAVF_RX_RING(rxq) = (union iavf_rx_desc *)mz->addr;
 
 	rxq->mz = mz;
 	reset_rx_queue(rxq);
@@ -905,7 +905,7 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	PMD_DRV_FUNC_TRACE();
@@ -997,7 +997,7 @@ iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	PMD_DRV_FUNC_TRACE();
@@ -1060,7 +1060,7 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 void
 iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-	struct iavf_rx_queue *q = dev->data->rx_queues[qid];
+	struct ci_rx_queue *q = dev->data->rx_queues[qid];
 
 	if (!q)
 		return;
@@ -1089,7 +1089,7 @@ iavf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 static void
 iavf_reset_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 
@@ -1375,7 +1375,7 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
  * from the hardware point of view.
  */
 static inline void
-iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+iavf_update_rx_tail(struct ci_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
 {
 	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
 
@@ -1397,9 +1397,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	volatile union iavf_rx_desc *rx_ring;
 	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1416,8 +1416,8 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = IAVF_RX_RING(rxq);
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1442,13 +1442,13 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1456,9 +1456,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1506,9 +1506,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 {
 	volatile union iavf_rx_desc *rx_ring;
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1525,8 +1525,8 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = IAVF_RX_RING(rxq);
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1559,13 +1559,13 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1573,9 +1573,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1629,9 +1629,9 @@ uint16_t
 iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1643,9 +1643,9 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t pkt_flags;
 	uint64_t ts_ns;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_desc *rx_ring = IAVF_RX_RING(rxq);
 	volatile union iavf_rx_flex_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1678,13 +1678,13 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1692,10 +1692,10 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1806,9 +1806,9 @@ uint16_t
 iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1820,9 +1820,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_desc *rx_ring = IAVF_RX_RING(rxq);
 	volatile union iavf_rx_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1847,13 +1847,13 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1861,10 +1861,10 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1963,12 +1963,12 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 #define IAVF_LOOK_AHEAD 8
 static inline int
-iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
+iavf_rx_scan_hw_ring_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts)
 {
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len;
@@ -1976,10 +1976,10 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 	uint64_t ts_ns;
 
-	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxdp = (volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2038,7 +2038,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 					  rxq->rx_tail +
 					  i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
 				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
 			mb->data_len = pkt_len;
@@ -2072,11 +2072,11 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else {
 				/* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2090,16 +2090,16 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline int
-iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+iavf_rx_scan_hw_ring(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -2108,9 +2108,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
@@ -2164,7 +2164,7 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
 					 rxq->rx_tail + i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			qword1 = rte_le_to_cpu_64
 					(rxdp[j].wb.qword1.status_error_len);
 			pkt_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
@@ -2190,10 +2190,10 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else { /* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2207,13 +2207,13 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline uint16_t
-iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
+iavf_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -2232,10 +2232,10 @@ iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
 }
 
 static inline int
-iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
+iavf_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -2252,13 +2252,13 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = IAVF_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
-			rte_prefetch0(rxep[i + 1]);
+			rte_prefetch0(rxep[i + 1].mbuf);
 
-		mb = rxep[i];
+		mb = rxep[i].mbuf;
 		rte_mbuf_refcnt_set(mb, 1);
 		mb->next = NULL;
 		mb->data_off = RTE_PKTMBUF_HEADROOM;
@@ -2284,7 +2284,7 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = (struct iavf_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -2312,11 +2312,11 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 			rxq->rx_tail = (uint16_t)(rxq->rx_tail - (nb_rx + nb_staged));
 			for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++) {
-				rxq->sw_ring[j] = rx_pkts[i];
+				rxq->sw_ring[j].mbuf = rx_pkts[i];
 				rx_pkts[i] = NULL;
 			}
 			for (i = 0, j = rxq->rx_tail + nb_rx; i < nb_staged; i++, j++) {
-				rxq->sw_ring[j] = rxq->rx_stage[i];
+				rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
 				rx_pkts[i] = NULL;
 			}
 
@@ -3843,13 +3843,13 @@ static uint16_t
 iavf_recv_pkts_no_poll(void *rx_queue, struct rte_mbuf **rx_pkts,
 				uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	enum iavf_rx_burst_type rx_burst_type;
 
-	if (!rxq->vsi || rxq->vsi->adapter->no_poll)
+	if (!rxq->iavf_vsi || rxq->iavf_vsi->adapter->no_poll)
 		return 0;
 
-	rx_burst_type = rxq->vsi->adapter->rx_burst_type;
+	rx_burst_type = rxq->iavf_vsi->adapter->rx_burst_type;
 
 	return iavf_rx_pkt_burst_ops[rx_burst_type].pkt_burst(rx_queue,
 								rx_pkts, nb_pkts);
@@ -3965,7 +3965,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	enum iavf_rx_burst_type rx_burst_type;
 	int no_poll_on_link_down = adapter->devargs.no_poll_on_link_down;
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool use_flex = true;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -4379,7 +4379,7 @@ void
 iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		     struct rte_eth_rxq_info *qinfo)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -4414,11 +4414,11 @@ iavf_dev_rxq_count(void *rx_queue)
 {
 #define IAVF_RXQ_SCAN_INTERVAL 4
 	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
@@ -4431,8 +4431,8 @@ iavf_dev_rxq_count(void *rx_queue)
 		desc += IAVF_RXQ_SCAN_INTERVAL;
 		rxdp += IAVF_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-					desc - rxq->nb_rx_desc]);
+			rxdp = IAVF_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -4441,7 +4441,7 @@ iavf_dev_rxq_count(void *rx_queue)
 int
 iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -4456,7 +4456,7 @@ iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
+	status = &IAVF_RX_RING_PTR(rxq, desc)->wb.qword1.status_error_len;
 	mask = rte_le_to_cpu_64((1ULL << IAVF_RX_DESC_STATUS_DD_SHIFT)
 		<< IAVF_RXD_QW1_STATUS_SHIFT);
 	if (*status & mask)
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 62b5a67c84..c43ddc3c2f 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -17,7 +17,7 @@
 #define IAVF_RING_BASE_ALIGN      128
 
 /* used for Rx Bulk Allocate */
-#define IAVF_RX_MAX_BURST         32
+#define IAVF_RX_MAX_BURST         CI_RX_MAX_BURST
 
 /* Max data buffer size must be 16K - 128 bytes */
 #define IAVF_RX_MAX_DATA_BUF_SIZE (16 * 1024 - 128)
@@ -198,17 +198,24 @@ union iavf_32b_rx_flex_desc {
 #ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 #define iavf_rx_desc iavf_16byte_rx_desc
 #define iavf_rx_flex_desc iavf_16b_rx_flex_desc
+#define IAVF_RX_RING(rxq) \
+	((rxq)->iavf_rx_16b_ring)
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
 #define iavf_rx_flex_desc iavf_32b_rx_flex_desc
+#define IAVF_RX_RING(rxq) \
+	((rxq)->iavf_rx_32b_ring)
 #endif
 
-typedef void (*iavf_rxd_to_pkt_fields_t)(struct iavf_rx_queue *rxq,
+#define IAVF_RX_RING_PTR(rxq, entry) \
+	(IAVF_RX_RING(rxq) + (entry))
+
+typedef void (*iavf_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 				struct rte_mbuf *mb,
 				volatile union iavf_rx_flex_desc *rxdp);
 
 struct iavf_rxq_ops {
-	void (*release_mbufs)(struct iavf_rx_queue *rxq);
+	void (*release_mbufs)(struct ci_rx_queue *rxq);
 };
 
 struct iavf_txq_ops {
@@ -221,59 +228,6 @@ struct iavf_rx_queue_stats {
 	struct iavf_ipsec_crypto_stats ipsec_crypto;
 };
 
-/* Structure associated with each Rx queue. */
-struct iavf_rx_queue {
-	struct rte_mempool *mp;       /* mbuf pool to populate Rx ring */
-	const struct rte_memzone *mz; /* memzone for Rx ring */
-	volatile union iavf_rx_desc *rx_ring; /* Rx ring virtual address */
-	uint64_t rx_ring_phys_addr;   /* Rx ring DMA address */
-	struct rte_mbuf **sw_ring;     /* address of SW ring */
-	uint16_t nb_rx_desc;          /* ring length */
-	uint16_t rx_tail;             /* current value of tail */
-	volatile uint8_t *qrx_tail;   /* register address of tail */
-	uint16_t rx_free_thresh;      /* max free RX desc to hold */
-	uint16_t nb_rx_hold;          /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
-	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
-	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
-	uint8_t rxdid;
-	uint8_t rel_mbufs_type;
-
-	/* used for VPMD */
-	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
-	uint16_t rxrearm_start;    /* the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /* value to init mbufs */
-
-	/* for rx bulk */
-	uint16_t rx_nb_avail;      /* number of staged packets ready */
-	uint16_t rx_next_avail;    /* index of next staged packets */
-	uint16_t rx_free_trigger;  /* triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[IAVF_RX_MAX_BURST * 2]; /* store mbuf */
-
-	uint16_t port_id;        /* device port ID */
-	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id;      /* Rx queue index */
-	uint16_t rx_buf_len;    /* The packet buffer size */
-	uint16_t rx_hdr_len;    /* The header buffer size */
-	uint16_t max_pkt_len;   /* Maximum packet length */
-	struct iavf_vsi *vsi; /**< the VSI this queue belongs to */
-
-	bool q_set;             /* if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	const struct iavf_rxq_ops *ops;
-	uint8_t rx_flags;
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
-	uint8_t proto_xtr; /* protocol extraction type */
-	uint64_t xtr_ol_flag;
-		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats *stats;
-	uint64_t offloads;
-	uint64_t phc_time;
-	uint64_t hw_time_update;
-};
-
 /* Offload features */
 union iavf_tx_offload {
 	uint64_t data;
@@ -691,7 +645,7 @@ uint16_t iavf_xmit_pkts_vec_avx2_offload(void *tx_queue, struct rte_mbuf **tx_pk
 int iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 int iavf_rx_vec_dev_check(struct rte_eth_dev *dev);
 int iavf_tx_vec_dev_check(struct rte_eth_dev *dev);
-int iavf_rxq_vec_setup(struct iavf_rx_queue *rxq);
+int iavf_rxq_vec_setup(struct ci_rx_queue *rxq);
 int iavf_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t iavf_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts);
@@ -731,12 +685,12 @@ uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
 
 void iavf_set_default_ptype_table(struct rte_eth_dev *dev);
 void iavf_tx_queue_release_mbufs_avx512(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq);
 void iavf_tx_queue_release_mbufs_sse(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq);
 
 static inline
-void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq,
+void iavf_dump_rx_descriptor(struct ci_rx_queue *rxq,
 			    const volatile void *desc,
 			    uint16_t rx_id)
 {
@@ -794,7 +748,7 @@ void iavf_dump_tx_descriptor(const struct ci_tx_queue *txq,
 #define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->dev_data->nb_rx_queues; i++) { \
-		struct iavf_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index 88e35dc3e9..f51fa4acf9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -7,7 +7,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, false);
 }
@@ -15,21 +15,19 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 #define PKTLEN_SHIFT     10
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq,
 			     struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts, uint8_t *split_packet,
 			     bool offload)
 {
 #define IAVF_DESCS_PER_LOOP_AVX 8
 
-	/* const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; */
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	/* struct iavf_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail]; */
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_desc *rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -487,14 +485,14 @@ flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct ci_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts, uint8_t *split_packet,
 				      bool offload)
 {
 #define IAVF_DESCS_PER_LOOP_AVX 8
 
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
@@ -503,9 +501,9 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+		(volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -1476,7 +1474,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1565,7 +1563,7 @@ iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index f2af028bef..80495f33cd 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -28,26 +28,26 @@
 #define IAVF_RX_TS_OFFLOAD
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			       struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts, uint8_t *split_packet,
 			       bool offload)
 {
 #ifdef IAVF_RX_PTYPE_OFFLOAD
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 #endif
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_desc *rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -578,13 +578,13 @@ flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct ci_rx_queue *rxq,
 					struct rte_mbuf **rx_pkts,
 					uint16_t nb_pkts,
 					uint8_t *split_packet,
 					bool offload)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -594,9 +594,9 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+		(volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -1653,7 +1653,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1729,7 +1729,7 @@ iavf_recv_scattered_burst_vec_avx512_flex_rxd(void *rx_queue,
 					      uint16_t nb_pkts,
 					      bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index 38e9a206d9..f0a7d19b6a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -8,7 +8,6 @@
 #include <ethdev_driver.h>
 #include <rte_malloc.h>
 
-#include "../common/rx.h"
 #include "iavf.h"
 #include "iavf_rxtx.h"
 
@@ -21,7 +20,7 @@ iavf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
+_iavf_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -32,15 +31,15 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 	/* free all mbufs that are valid in the ring */
 	if (rxq->rxrearm_nb == 0) {
 		for (i = 0; i < rxq->nb_rx_desc; i++) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	} else {
 		for (i = rxq->rx_tail;
 		     i != rxq->rxrearm_start;
 		     i = (i + 1) & mask) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	}
 
@@ -51,7 +50,7 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 }
 
 static inline int
-iavf_rx_vec_queue_default(struct iavf_rx_queue *rxq)
+iavf_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -117,7 +116,7 @@ static inline int
 iavf_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret;
 	int result = 0;
 
@@ -240,14 +239,14 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 
 #ifdef RTE_ARCH_X86
 static __rte_always_inline void
-iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
+iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -259,7 +258,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -278,8 +277,8 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index a583340f15..0fe4a63dc7 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -15,18 +15,18 @@
 #include "iavf_rxtx_vec_common.h"
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
@@ -35,7 +35,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i] = &rxq->fake_mbuf;
+				rxep[i].mbuf = &rxq->fake_mbuf;
 				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
 			}
 		}
@@ -46,8 +46,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
 	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0];
-		mb1 = rxep[1];
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
 
 		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
 		dma_addr0 = vdupq_n_u64(paddr);
@@ -75,7 +75,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	RTE_SET_USED(rxdp);
@@ -193,17 +193,17 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	RTE_SET_USED(split_packet);
 
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -226,7 +226,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
 	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
 
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -283,8 +283,8 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
 		descs[0] = vld1q_lane_u64(RTE_CAST_PTR(uint64_t *, rxdp), descs[0], 0);
 
 		/* B.1 load 4 mbuf point */
-		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
-		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
+		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos].mbuf);
+		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2].mbuf);
 
 		/* B.2 copy 4 mbuf point into rx_pkts  */
 		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
@@ -394,13 +394,13 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_NEON_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 2e41079e88..f18dfd636c 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -13,19 +13,19 @@
 #include <rte_vect.h>
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
@@ -33,7 +33,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -47,8 +47,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
@@ -88,7 +88,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		  struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -206,11 +206,11 @@ flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
 		       struct rte_mbuf **rx_pkts)
 #else
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
 #endif
 {
@@ -466,16 +466,16 @@ flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -500,7 +500,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -571,7 +571,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -714,16 +714,16 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+_recv_raw_pkts_vec_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -779,7 +779,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = (volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+	rxdp = (volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -857,7 +857,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -1207,7 +1207,7 @@ static uint16_t
 iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1276,7 +1276,7 @@ iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
 				       struct rte_mbuf **rx_pkts,
 				       uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1449,7 +1449,7 @@ iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
@@ -1462,7 +1462,7 @@ iavf_txq_vec_setup(struct ci_tx_queue *txq)
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_SSE_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_vchnl.c b/drivers/net/intel/iavf/iavf_vchnl.c
index 6feca8435e..9f8bb07726 100644
--- a/drivers/net/intel/iavf/iavf_vchnl.c
+++ b/drivers/net/intel/iavf/iavf_vchnl.c
@@ -1218,7 +1218,7 @@ int
 iavf_configure_queues(struct iavf_adapter *adapter,
 		uint16_t num_queue_pairs, uint16_t index)
 {
-	struct iavf_rx_queue **rxq = (struct iavf_rx_queue **)adapter->dev_data->rx_queues;
+	struct ci_rx_queue **rxq = (struct ci_rx_queue **)adapter->dev_data->rx_queues;
 	struct ci_tx_queue **txq = (struct ci_tx_queue **)adapter->dev_data->tx_queues;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -2258,9 +2258,9 @@ iavf_get_ptp_cap(struct iavf_adapter *adapter)
 }
 
 int
-iavf_get_phc_time(struct iavf_rx_queue *rxq)
+iavf_get_phc_time(struct ci_rx_queue *rxq)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_phc_time phc_time;
 	struct iavf_cmd_info args;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 07/13] net/intel: generalize vectorized Rx rearm
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (4 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 06/13] net/iavf: " Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-15 10:56     ` Bruce Richardson
  2025-05-12 12:54   ` [PATCH v3 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
                     ` (7 subsequent siblings)
  13 siblings, 1 reply; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson

There is certain amount of duplication between various drivers when it
comes to Rx ring rearm. This patch takes implementation from ice driver
as a base because it has support for no IOVA in mbuf as well as all
vector implementations, and moves them to a common file.

The driver Rx rearm code used copious amounts of #ifdef-ery to
discriminate between 16- and 32-byte descriptor support, but we cannot do
that in the common code because we will not have access to those
definitions. So, instead, we use copious amounts of compile-time constant
propagation and force-inlining to ensure that the compiler generates
effectively the same code it generated back when it was in the driver. We
also add a compile-time definition for vectorization levels for x86
vector instructions to discriminate between different instruction sets.
This too is constant-propagated, and thus should not affect performance.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h               |   3 +
 drivers/net/intel/common/rx_vec_sse.h       | 323 ++++++++++++++++++++
 drivers/net/intel/ice/ice_rxtx.h            |   2 +-
 drivers/net/intel/ice/ice_rxtx_common_avx.h | 233 --------------
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  77 +----
 7 files changed, 336 insertions(+), 312 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_sse.h
 delete mode 100644 drivers/net/intel/ice/ice_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 2d9328ae89..65e920fdd1 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -14,6 +14,8 @@
 #define CI_RX_BURST 32
 #define CI_RX_MAX_BURST 32
 #define CI_RX_MAX_NSEG 2
+#define CI_VPMD_DESCS_PER_LOOP 4
+#define CI_VPMD_RX_REARM_THRESH 64
 
 struct ci_rx_queue;
 
@@ -40,6 +42,7 @@ struct ci_rx_queue {
 		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
 		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
 		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
+		volatile void *rx_ring; /**< Generic */
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
new file mode 100644
index 0000000000..6fe0baf38b
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_sse.h
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_SSE_H_
+#define _COMMON_INTEL_RX_VEC_SSE_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+
+#include "rx.h"
+
+enum ci_rx_vec_level {
+	CI_RX_VEC_LEVEL_SSE = 0,
+	CI_RX_VEC_LEVEL_AVX2,
+	CI_RX_VEC_LEVEL_AVX512,
+};
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void **)rxp,
+				 rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
+				_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr),
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m128i zero = _mm_setzero_si128();
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
+		__m128i vaddr0, vaddr1;
+		__m128i dma_addr0, dma_addr1;
+		struct rte_mbuf *mb0, *mb1;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* add headroom to address values */
+		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
+		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
+		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
+	}
+}
+
+#ifdef __AVX2__
+/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const size_t desc_len = 16;
+	volatile void *rxdp;
+	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m256i zero = _mm256_setzero_si256();
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+		mb2 = rxp[2].mbuf;
+		mb3 = rxp[3].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+
+		/* add headroom to address values */
+		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
+		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
+		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
+	}
+}
+#endif /* __AVX2__ */
+
+#ifdef __AVX512VL__
+/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const size_t desc_len = 16;
+	volatile void *rxdp;
+	int i;
+	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+	__m512i dma_addr0_3, dma_addr4_7;
+	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	__m512i zero = _mm512_setzero_si512();
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i vaddr4_5, vaddr6_7;
+		__m512i vaddr0_3, vaddr4_7;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+		mb2 = rxp[2].mbuf;
+		mb3 = rxp[3].mbuf;
+		mb4 = rxp[4].mbuf;
+		mb5 = rxp[5].mbuf;
+		mb6 = rxp[6].mbuf;
+		mb7 = rxp[7].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3, and so on.
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+		vaddr4_5 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+						vaddr5, 1);
+		vaddr6_7 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+						vaddr7, 1);
+		vaddr0_3 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+						vaddr2_3, 1);
+		vaddr4_7 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+						vaddr6_7, 1);
+
+		/* add headroom to address values */
+		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
+		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
+		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
+		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
+	}
+}
+#endif /* __AVX512VL__ */
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
+		const enum ci_rx_vec_level vec_level)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
+		return;
+
+	if (desc_len == 16) {
+		switch (vec_level) {
+		case CI_RX_VEC_LEVEL_AVX512:
+#ifdef __AVX512VL__
+			_ci_rxq_rearm_avx512(rxq);
+			break;
+#else
+			/* fall back to AVX2 unless requested not to */
+			/* fall through */
+#endif
+		case CI_RX_VEC_LEVEL_AVX2:
+#ifdef __AVX2__
+			_ci_rxq_rearm_avx2(rxq);
+			break;
+#else
+			/* fall back to SSE if AVX2 isn't supported */
+			/* fall through */
+#endif
+		case CI_RX_VEC_LEVEL_SSE:
+			_ci_rxq_rearm_sse(rxq, desc_len);
+			break;
+		}
+	} else {
+		/* for 32-byte descriptors only support SSE */
+		_ci_rxq_rearm_sse(rxq, desc_len);
+	}
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_SSE_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 1a39770d7d..72d0972587 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -43,7 +43,7 @@
 
 #define ICE_VPMD_RX_BURST           32
 #define ICE_VPMD_TX_BURST           32
-#define ICE_RXQ_REARM_THRESH        64
+#define ICE_RXQ_REARM_THRESH        CI_VPMD_RX_REARM_THRESH
 #define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
 #define ICE_TX_MAX_FREE_BUF_SZ      64
 #define ICE_DESCS_PER_LOOP          4
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
deleted file mode 100644
index 7209c902db..0000000000
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2019 Intel Corporation
- */
-
-#ifndef _ICE_RXTX_COMMON_AVX_H_
-#define _ICE_RXTX_COMMON_AVX_H_
-
-#include "ice_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, vaddr4_7);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__ */
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, vaddr2_3);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__ */
-
-#endif /* _ICE_RXTX_COMMON_AVX_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index f4555369a2..5ca3f92482 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -3,14 +3,15 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_AVX2);
 }
 
 static __rte_always_inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index 6eea74d703..883ea97c07 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -3,7 +3,8 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
@@ -12,7 +13,7 @@
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_AVX512);
 }
 
 static inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index dc9d37226a..fa0c7e8829 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -4,6 +4,8 @@
 
 #include "ice_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline __m128i
@@ -28,80 +30,7 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 static inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-					  RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				 offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 08/13] net/i40e: use common Rx rearm code
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (5 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-15 10:58     ` Bruce Richardson
  2025-05-12 12:54   ` [PATCH v3 09/13] net/iavf: " Anatoly Burakov
                     ` (6 subsequent siblings)
  13 siblings, 1 reply; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

The i40e driver has an implementation of vectorized mbuf rearm code that
is identical to the one in the common code, so just use that.

In addition, the i40e has an implementation of Rx queue rearm for Neon
instruction set, so create a common header for Neon implementations too,
and use that in i40e Neon code.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2:
    - Fix compile issues on Arm64

 drivers/net/intel/common/rx_vec_neon.h        | 131 +++++++++++
 drivers/net/intel/i40e/i40e_rxtx.h            |   2 +-
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 215 ------------------
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  59 +----
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  70 +-----
 7 files changed, 144 insertions(+), 343 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_neon.h
 delete mode 100644 drivers/net/intel/i40e/i40e_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx_vec_neon.h b/drivers/net/intel/common/rx_vec_neon.h
new file mode 100644
index 0000000000..d79802b1c0
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_neon.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_NEON_H_
+#define _COMMON_INTEL_RX_VEC_NEON_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+#include <rte_vect.h>
+
+#include "rx.h"
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void **)rxp,
+				 rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			uint64x2_t zero = vdupq_n_u64(0);
+
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
+				vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr), zero);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_neon(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const uint64x2_t zero = vdupq_n_u64(0);
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile void *rxdp;
+	int i;
+
+	const uint8x8_t mbuf_init = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
+		uint64_t addr0, addr1;
+		uint64x2_t dma_addr0, dma_addr1;
+		struct rte_mbuf *mb0, *mb1;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		addr0 = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr0), dma_addr0);
+
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+		addr1 = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr1), dma_addr1);
+#else
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		addr0 = (uintptr_t)RTE_PTR_ADD(mb0->buf_addr, RTE_PKTMBUF_HEADROOM);
+		dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr0), dma_addr0);
+
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+		addr1 = (uintptr_t)RTE_PTR_ADD(mb1->buf_addr, RTE_PKTMBUF_HEADROOM);
+		dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr1), dma_addr1);
+#endif
+	}
+}
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
+		return;
+
+	_ci_rxq_rearm_neon(rxq, desc_len);
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_NEON_H_ */
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 4b5a84d8ef..8a41db2df3 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -13,7 +13,7 @@
 
 #define RTE_I40E_VPMD_RX_BURST        32
 #define RTE_I40E_VPMD_TX_BURST        32
-#define RTE_I40E_RXQ_REARM_THRESH      32
+#define RTE_I40E_RXQ_REARM_THRESH      CI_VPMD_RX_REARM_THRESH
 #define RTE_I40E_MAX_RX_BURST          RTE_I40E_RXQ_REARM_THRESH
 #define RTE_I40E_TX_MAX_FREE_BUF_SZ    64
 #define RTE_I40E_DESCS_PER_LOOP    4
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
deleted file mode 100644
index fd9447014b..0000000000
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2015 Intel Corporation
- */
-
-#ifndef _I40E_RXTX_COMMON_AVX_H_
-#define _I40E_RXTX_COMMON_AVX_H_
-#include <stdint.h>
-#include <ethdev_driver.h>
-#include <rte_malloc.h>
-
-#include "i40e_ethdev.h"
-#include "i40e_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__*/
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
-			vaddr2_3 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__*/
-
-#endif /*_I40E_RXTX_COMMON_AVX_H_*/
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 0f3f7430aa..260b7d700a 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -11,14 +11,15 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_AVX2);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index f2292b45e8..be004e9f4f 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -11,7 +11,8 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
@@ -20,7 +21,7 @@
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_AVX512);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 814aa666dc..6c21546471 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -16,65 +16,12 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_I40E_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc));
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 74cd59e245..432177d499 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -12,78 +12,14 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 09/13] net/iavf: use common Rx rearm code
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (6 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-12 12:54   ` [PATCH v3 10/13] net/ixgbe: " Anatoly Burakov
                     ` (5 subsequent siblings)
  13 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

The iavf driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/iavf/iavf_rxtx.h            |   4 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 199 ------------------
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  58 +----
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  72 +------
 6 files changed, 11 insertions(+), 328 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index c43ddc3c2f..d70250bf85 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -25,8 +25,8 @@
 /* used for Vector PMD */
 #define IAVF_VPMD_RX_MAX_BURST    32
 #define IAVF_VPMD_TX_MAX_BURST    32
-#define IAVF_RXQ_REARM_THRESH     32
-#define IAVF_VPMD_DESCS_PER_LOOP  4
+#define IAVF_RXQ_REARM_THRESH     CI_VPMD_RX_REARM_THRESH
+#define IAVF_VPMD_DESCS_PER_LOOP  CI_VPMD_DESCS_PER_LOOP
 #define IAVF_VPMD_TX_MAX_FREE_BUF 64
 
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index f51fa4acf9..496c7abc42 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2019 Intel Corporation
  */
 
+#include "../common/rx_vec_sse.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -9,7 +10,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), false);
 }
 
 #define PKTLEN_SHIFT     10
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index 80495f33cd..e7cd2b7c89 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include "../common/rx_vec_sse.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -30,7 +31,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index f0a7d19b6a..50228eb112 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -237,203 +237,4 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 	*txd_hi |= ((uint64_t)td_cmd) << IAVF_TXD_QW1_CMD_SHIFT;
 }
 
-#ifdef RTE_ARCH_X86
-static __rte_always_inline void
-iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxp,
-				 IAVF_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef CC_AVX512_SUPPORT
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-				i += 8, rxp += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-			mb4 = rxp[4];
-			mb5 = rxp[5];
-			mb6 = rxp[6];
-			mb7 = rxp[7];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-							vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-							vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
-			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
-		}
-	} else
-#endif
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-				i += 4, rxp += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
 #endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index 0fe4a63dc7..fe7c3572fb 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -14,64 +14,12 @@
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  IAVF_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc));
 }
 
 static inline void
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index f18dfd636c..3f0ca6cf8e 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -9,82 +9,14 @@
 #include "iavf.h"
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
-				 rxq->rx_free_thresh) < 0) {
-		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			rxq->rx_free_thresh;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += rxq->rx_free_thresh;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= rxq->rx_free_thresh;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-		   "rearm_start=%u rearm_nb=%u",
-		   rxq->port_id, rxq->queue_id,
-		   rx_id, rxq->rxrearm_start, rxq->rxrearm_nb);
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), false);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 10/13] net/ixgbe: use common Rx rearm code
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (7 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 09/13] net/iavf: " Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-12 12:54   ` [PATCH v3 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
                     ` (4 subsequent siblings)
  13 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The ixgbe driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  2 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c | 66 +---------------
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  | 75 +------------------
 3 files changed, 7 insertions(+), 136 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 84e28eb254..f3dd32b9ff 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -37,7 +37,7 @@
 #define RTE_IXGBE_DESCS_PER_LOOP    4
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
-#define RTE_IXGBE_RXQ_REARM_THRESH      32
+#define RTE_IXGBE_RXQ_REARM_THRESH      CI_VPMD_RX_REARM_THRESH
 #define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
 #endif
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 630a2e6a1d..0842f213ef 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -11,72 +11,12 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-	uint8x8_t p;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read),
-					  zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/*
-		 * Flush mbuf with pkt template.
-		 * Data to be rearmed is 6 bytes long.
-		 */
-		vst1_u8((uint8_t *)&mb0->rearm_data, p);
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		vst1_u8((uint8_t *)&mb1->rearm_data, p);
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ixgbe_adv_rx_desc));
 }
 
 static inline void
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index ecfb0d6ba6..c6e90b8d41 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -10,83 +10,14 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
-		vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* set Header Buffer Address to zero */
-		dma_addr0 =  _mm_and_si128(dma_addr0, hba_msk);
-		dma_addr1 =  _mm_and_si128(dma_addr1, hba_msk);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ixgbe_adv_rx_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifdef RTE_LIB_SECURITY
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 11/13] net/intel: support wider x86 vectors for Rx rearm
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (8 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 10/13] net/ixgbe: " Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-12 12:54   ` [PATCH v3 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
                     ` (3 subsequent siblings)
  13 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson

Currently, for 32-byte descriptor format, only SSE instruction set is
supported. Add implementation for AVX2 and AVX512 instruction sets. This
implementation similarly constant-propagates everything at compile time and
thus should not affect performance of existing code paths. To improve code
readability and reduce code duplication due to supporting different sized
descriptors, the implementation is also refactored.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx_vec_sse.h | 380 ++++++++++++++------------
 1 file changed, 205 insertions(+), 175 deletions(-)

diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
index 6fe0baf38b..0aeaac3dc9 100644
--- a/drivers/net/intel/common/rx_vec_sse.h
+++ b/drivers/net/intel/common/rx_vec_sse.h
@@ -48,223 +48,258 @@ _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
 	return 0;
 }
 
-/*
- * SSE code path can handle both 16-byte and 32-byte descriptors with one code
- * path, as we only ever write 16 bytes at a time.
- */
-static __rte_always_inline void
-_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+static __rte_always_inline __m128i
+_ci_rxq_rearm_desc_sse(const __m128i vaddr)
 {
 	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m128i zero = _mm_setzero_si128();
+	__m128i reg;
+
+	/* add headroom to address values */
+	reg = _mm_add_epi64(vaddr, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			 offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+{
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint8_t desc_per_reg = 1;
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile void *rxdp;
 	int i;
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
-		__m128i vaddr0, vaddr1;
-		__m128i dma_addr0, dma_addr1;
-		struct rte_mbuf *mb0, *mb1;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		const struct rte_mbuf *mb0 = rxp[0].mbuf;
+		const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
+		const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+		const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* add headroom to address values */
-		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
-		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
-
-#if RTE_IOVA_IN_MBUF
-		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
-#endif
+		const __m128i reg0 = _ci_rxq_rearm_desc_sse(vaddr0);
+		const __m128i reg1 = _ci_rxq_rearm_desc_sse(vaddr1);
 
 		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), reg0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), reg1);
 	}
 }
 
 #ifdef __AVX2__
-/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
-static __rte_always_inline void
-_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+static __rte_always_inline __m256i
+_ci_rxq_rearm_desc_avx2(const __m128i vaddr0, const __m128i vaddr1)
 {
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const size_t desc_len = 16;
-	volatile void *rxdp;
 	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m256i zero = _mm256_setzero_si256();
+	__m256i reg;
+
+	/* merge by casting 0 to 256-bit and inserting 1 into the high lanes */
+	reg =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+					vaddr1, 1);
+
+	/* add headroom to address values */
+	reg = _mm256_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm256_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm256_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m256i) / desc_len;
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
+	volatile void *rxdp;
 	int i;
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
+	/* Initialize the mbufs in vector, process 2 or 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		__m256i reg0, reg1;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-		mb2 = rxp[2].mbuf;
-		mb3 = rxp[3].mbuf;
+		if (desc_per_iter == 2) {
+			/* 16 byte descriptor, 16 byte zero, times two */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, zero);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr1, zero);
+		} else {
+			/* 16 byte descriptor times four */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-		/* add headroom to address values */
-		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
-		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
-		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, vaddr1);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr2, vaddr3);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), reg0);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), reg1);
 	}
 }
 #endif /* __AVX2__ */
 
 #ifdef __AVX512VL__
-/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline __m512i
+_ci_rxq_rearm_desc_avx512(const __m128i vaddr0, const __m128i vaddr1,
+		const __m128i vaddr2, const __m128i vaddr3)
+{
+	const __m512i zero = _mm512_setzero_si512();
+	const __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	__m256i vaddr0_1, vaddr2_3;
+	__m512i reg;
+
+	/**
+	 * merge 0 & 1, by casting 0 to 256-bit and inserting 1 into the high
+	 * lanes. Similarly for 2 & 3.
+	 */
+	vaddr0_1 =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+					vaddr1, 1);
+	vaddr2_3 =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+					vaddr3, 1);
+	/*
+	 * merge 0+1 & 2+3, by casting 0+1 to 512-bit and inserting 2+3 into the
+	 * high lanes.
+	 */
+	reg =
+		_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+					vaddr2_3, 1);
+
+	/* add headroom to address values */
+	reg = _mm512_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm512_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm512_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
 static __rte_always_inline void
-_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq, const size_t desc_len)
 {
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const size_t desc_len = 16;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m512i) / desc_len;
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile void *rxdp;
 	int i;
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-	__m512i dma_addr0_3, dma_addr4_7;
-	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-	__m512i zero = _mm512_setzero_si512();
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
-	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
+	/* Initialize the mbufs in vector, process 4 or 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-		__m256i vaddr0_1, vaddr2_3;
-		__m256i vaddr4_5, vaddr6_7;
-		__m512i vaddr0_3, vaddr4_7;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		__m512i reg0, reg1;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-		mb2 = rxp[2].mbuf;
-		mb3 = rxp[3].mbuf;
-		mb4 = rxp[4].mbuf;
-		mb5 = rxp[5].mbuf;
-		mb6 = rxp[6].mbuf;
-		mb7 = rxp[7].mbuf;
+		if (desc_per_iter == 4) {
+			/* 16-byte descriptor, 16 byte zero, times four */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3, and so on.
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-		vaddr4_5 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-						vaddr5, 1);
-		vaddr6_7 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-						vaddr7, 1);
-		vaddr0_3 =
-			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						vaddr2_3, 1);
-		vaddr4_7 =
-			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						vaddr6_7, 1);
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, zero, vaddr1, zero);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr2, zero, vaddr3, zero);
+		} else {
+			/* 16-byte descriptor times eight */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
+			const struct rte_mbuf *mb4 = rxp[4].mbuf;
+			const struct rte_mbuf *mb5 = rxp[5].mbuf;
+			const struct rte_mbuf *mb6 = rxp[6].mbuf;
+			const struct rte_mbuf *mb7 = rxp[7].mbuf;
 
-		/* add headroom to address values */
-		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
-		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
+			const __m128i vaddr4 = _mm_loadu_si128((const __m128i *)&mb4->buf_addr);
+			const __m128i vaddr5 = _mm_loadu_si128((const __m128i *)&mb5->buf_addr);
+			const __m128i vaddr6 = _mm_loadu_si128((const __m128i *)&mb6->buf_addr);
+			const __m128i vaddr7 = _mm_loadu_si128((const __m128i *)&mb7->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
-		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
-		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, vaddr1, vaddr2, vaddr3);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr4, vaddr5, vaddr6, vaddr7);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), reg0);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), reg1);
 	}
 }
 #endif /* __AVX512VL__ */
@@ -280,31 +315,26 @@ ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
 	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
 		return;
 
-	if (desc_len == 16) {
-		switch (vec_level) {
-		case CI_RX_VEC_LEVEL_AVX512:
+	switch (vec_level) {
+	case CI_RX_VEC_LEVEL_AVX512:
 #ifdef __AVX512VL__
-			_ci_rxq_rearm_avx512(rxq);
-			break;
+		_ci_rxq_rearm_avx512(rxq, desc_len);
+		break;
 #else
-			/* fall back to AVX2 unless requested not to */
-			/* fall through */
+		/* fall back to AVX2 unless requested not to */
+		/* fall through */
 #endif
-		case CI_RX_VEC_LEVEL_AVX2:
+	case CI_RX_VEC_LEVEL_AVX2:
 #ifdef __AVX2__
-			_ci_rxq_rearm_avx2(rxq);
+			_ci_rxq_rearm_avx2(rxq, desc_len);
 			break;
 #else
 			/* fall back to SSE if AVX2 isn't supported */
 			/* fall through */
 #endif
-		case CI_RX_VEC_LEVEL_SSE:
-			_ci_rxq_rearm_sse(rxq, desc_len);
-			break;
-		}
-	} else {
-		/* for 32-byte descriptors only support SSE */
+	case CI_RX_VEC_LEVEL_SSE:
 		_ci_rxq_rearm_sse(rxq, desc_len);
+		break;
 	}
 
 	rxq->rxrearm_start += rearm_thresh;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 12/13] net/intel: add common Rx mbuf recycle
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (9 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-12 12:54   ` [PATCH v3 13/13] net/intel: add common Tx " Anatoly Burakov
                     ` (2 subsequent siblings)
  13 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Rx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

While we're at it, also support no-IOVA-in-mbuf case.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 67 +++++++++++++++++++
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 37 +---------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 35 +---------
 3 files changed, 73 insertions(+), 66 deletions(-)
 create mode 100644 drivers/net/intel/common/recycle_mbufs.h

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
new file mode 100644
index 0000000000..fd31c5c1ff
--- /dev/null
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -0,0 +1,67 @@
+#ifndef _COMMON_INTEL_RECYCLE_MBUFS_H_
+#define _COMMON_INTEL_RECYCLE_MBUFS_H_
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <ethdev_driver.h>
+
+#include "rx.h"
+#include "tx.h"
+
+/**
+ * Recycle mbufs for Rx queue.
+ *
+ * @param rxq Rx queue pointer
+ * @param nb_mbufs number of mbufs to recycle
+ * @param desc_len length of Rx descriptor
+ */
+static __rte_always_inline void
+ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
+		const size_t desc_len)
+{
+	struct ci_rx_entry *rxep;
+	volatile void *rxdp;
+	uint16_t rx_id;
+	uint16_t i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	for (i = 0; i < nb_mbufs; i++) {
+		volatile uint64_t *cur = RTE_PTR_ADD(rxdp, i * desc_len);
+
+#if RTE_IOVA_IN_MBUF
+		const uint64_t paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(paddr);
+#else
+		const uint64_t vaddr = (uintptr_t)rxep[i].mbuf->buf_addr +
+			RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(vaddr);
+#endif
+
+		/* 8 bytes PBA followed by 8 bytes HBA */
+		*(cur + 1) = 0;
+		*cur = dma_addr;
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += nb_mbufs;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = 0;
+		rx_id = rxq->nb_rx_desc - 1;
+	}
+
+	rxq->rxrearm_nb -= nb_mbufs;
+
+	rte_io_wmb();
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index aa7703216d..073357bee2 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -10,43 +10,12 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union i40e_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* flush desc with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs, sizeof(union i40e_rx_desc));
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index 1df1787c7f..e2c3523ed2 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -8,41 +8,12 @@
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* Flush descriptors with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs, sizeof(union ixgbe_adv_rx_desc));
 }
 
 uint16_t
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v3 13/13] net/intel: add common Tx mbuf recycle
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (10 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
@ 2025-05-12 12:54   ` Anatoly Burakov
  2025-05-15 11:07     ` Bruce Richardson
  2025-05-12 12:58   ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Bruce Richardson
  2025-05-14 16:32   ` Bruce Richardson
  13 siblings, 1 reply; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-12 12:54 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Tx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 98 +++++++++++++++++++
 drivers/net/intel/common/tx.h                 |  1 +
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 88 +----------------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 89 +----------------
 4 files changed, 107 insertions(+), 169 deletions(-)

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
index fd31c5c1ff..88779c5aa4 100644
--- a/drivers/net/intel/common/recycle_mbufs.h
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -64,4 +64,102 @@ ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
 	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
 }
 
+/**
+ * Recycle buffers on Tx. Note: the function must first perform a driver-specific
+ * DD-bit-set check to ensure that the Tx descriptors are ready for recycling.
+ *
+ * @param txq Tx queue pointer
+ * @param recycle_rxq_info recycling mbuf information
+ *
+ * @return how many buffers were recycled
+ */
+static __rte_always_inline uint16_t
+ci_tx_recycle_mbufs(struct ci_tx_queue *txq,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct ci_tx_entry *txep;
+	struct rte_mbuf **rxep;
+	int i, n;
+	uint16_t nb_recycle_mbufs;
+	uint16_t avail = 0;
+	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+	uint16_t refill_head = *recycle_rxq_info->refill_head;
+	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+	/* Get available recycling Rx buffers. */
+	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+	/* Check Tx free thresh and Rx available space. */
+	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+		return 0;
+
+	n = txq->tx_rs_thresh;
+	nb_recycle_mbufs = n;
+
+	/* Mbufs recycle mode can only support no ring buffer wrapping around.
+	 * Two case for this:
+	 *
+	 * case 1: The refill head of Rx buffer ring needs to be aligned with
+	 * mbuf ring size. In this case, the number of Tx freeing buffers
+	 * should be equal to refill_requirement.
+	 *
+	 * case 2: The refill head of Rx ring buffer does not need to be aligned
+	 * with mbuf ring size. In this case, the update of refill head can not
+	 * exceed the Rx mbuf ring size.
+	 */
+	if ((refill_requirement && refill_requirement != n) ||
+		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+		return 0;
+
+	/* First buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1).
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+	rxep = recycle_rxq_info->mbuf_ring;
+	rxep += refill_head;
+
+	/* is fast-free enabled in offloads? */
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* Avoid txq containing buffers from unexpected mempool. */
+		if (unlikely(recycle_rxq_info->mp
+					!= txep[0].mbuf->pool))
+			return 0;
+
+		/* Directly put mbufs from Tx to Rx. */
+		for (i = 0; i < n; i++)
+			rxep[i] = txep[i].mbuf;
+	} else {
+		for (i = 0; i < n; i++) {
+			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+			/* If Tx buffers are not the last reference or from
+			 * unexpected mempool, previous copied buffers are
+			 * considered as invalid.
+			 */
+			if (unlikely(rxep[i] == NULL ||
+				recycle_rxq_info->mp != txep[i].mbuf->pool))
+				nb_recycle_mbufs = 0;
+		}
+		/* If Tx buffers are not the last reference or
+		 * from unexpected mempool, all recycled buffers
+		 * are put into mempool.
+		 */
+		if (nb_recycle_mbufs == 0)
+			for (i = 0; i < n; i++) {
+				if (rxep[i] != NULL)
+					rte_mempool_put(rxep[i]->pool, rxep[i]);
+			}
+	}
+
+	/* Update counters for Tx. */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+	return nb_recycle_mbufs;
+}
+
 #endif
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index c99bd5420f..cc70fa7db4 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -37,6 +37,7 @@ struct ci_tx_queue {
 		volatile struct ice_tx_desc *ice_tx_ring;
 		volatile struct idpf_base_tx_desc *idpf_tx_ring;
 		volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
+		volatile void *tx_ring; /**< Generic. */
 	};
 	volatile uint8_t *qtx_tail;               /* register address of tail */
 	union {
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 073357bee2..19edee781d 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -23,92 +23,12 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+	const uint64_t ctob = txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+	/* are Tx descriptors ready for recycling? */
+	if ((ctob & rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
 			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
 
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle mode can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * mbuf ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with mbuf ring size. In this case, the update of refill head can not
-	 * exceed the Rx mbuf ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
-		return 0;
-
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(txq, recycle_rxq_info);
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index e2c3523ed2..179205b422 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -21,92 +21,11 @@ ixgbe_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 		struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint32_t status;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+	const uint32_t status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
-	if (!(status & IXGBE_ADVTXD_STAT_DD))
-		return 0;
-
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * buffer ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with buffer ring size. In this case, the update of refill head can not
-	 * exceed the Rx buffer ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+	/* are Tx descriptors ready for recycling? */
+	if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
 		return 0;
 
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(tx_queue, recycle_rxq_info);
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (11 preceding siblings ...)
  2025-05-12 12:54   ` [PATCH v3 13/13] net/intel: add common Tx " Anatoly Burakov
@ 2025-05-12 12:58   ` Bruce Richardson
  2025-05-14 16:32   ` Bruce Richardson
  13 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-12 12:58 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, Vladimir Medvedkin

On Mon, May 12, 2025 at 01:54:27PM +0100, Anatoly Burakov wrote:
> The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
> anywhere in the codebase, and can be removed.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>  drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
>  drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
>  2 files changed, 2 insertions(+), 8 deletions(-)
> 
Hi Anatoly,

for any future versions, can you please include a cover letter, with a
summary indicating what patches have changed any why. I hope to start
reviewing this patchset shortly.

Thanks,
/Bruce

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (12 preceding siblings ...)
  2025-05-12 12:58   ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Bruce Richardson
@ 2025-05-14 16:32   ` Bruce Richardson
  2025-05-15 11:15     ` Burakov, Anatoly
  13 siblings, 1 reply; 82+ messages in thread
From: Bruce Richardson @ 2025-05-14 16:32 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, Vladimir Medvedkin

On Mon, May 12, 2025 at 01:54:27PM +0100, Anatoly Burakov wrote:
> The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
> anywhere in the codebase, and can be removed.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

With the one comment below fixed.

Acked-by: Bruce Richardson <bruce.richardson@intel.com>


> ---
>  drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
>  drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
>  2 files changed, 2 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
> index 95c80ac1b8..0c07ce3186 100644
> --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
> @@ -3296,17 +3296,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
>  	/*
>  	 * Modified to setup VFRDT for Virtual Function
>  	 */
> -	if (ixgbe_is_vf(dev)) {
> +	if (ixgbe_is_vf(dev))
>  		rxq->rdt_reg_addr =
>  			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
> -		rxq->rdh_reg_addr =
> -			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
> -	} else {
> +	else
>  		rxq->rdt_reg_addr =
>  			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
> -		rxq->rdh_reg_addr =
> -			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
> -	}
>  

Unrelated code change. Please remove from this patch in any subsequent
versions - it's not worth fixing this unless we are otherwise modifying
this function.

>  	rxq->rx_ring_phys_addr = rz->iova;
>  	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
> diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
> index 641f982b01..20a5c5a0af 100644
> --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
> @@ -85,7 +85,6 @@ struct ixgbe_rx_queue {
>  	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
>  	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
>  	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
> -	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
>  	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
>  	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
>  	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
> -- 
> 2.47.1
> 

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 02/13] net/iavf: make IPsec stats dynamically allocated
  2025-05-12 12:54   ` [PATCH v3 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
@ 2025-05-14 16:39     ` Bruce Richardson
  0 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-14 16:39 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, Vladimir Medvedkin, Ian Stokes

On Mon, May 12, 2025 at 01:54:28PM +0100, Anatoly Burakov wrote:
> Currently, the stats structure is directly embedded in the queue structure.
> We're about to move iavf driver to a common Rx queue structure, so we can't
> have driver-specific structures that aren't pointers, inside the common
> queue structure. To prepare, we replace direct embedding into the queue
> structure with a pointer to the stats structure.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>  drivers/net/intel/iavf/iavf_ethdev.c |  2 +-
>  drivers/net/intel/iavf/iavf_rxtx.c   | 21 ++++++++++++++++++---
>  drivers/net/intel/iavf/iavf_rxtx.h   |  2 +-
>  3 files changed, 20 insertions(+), 5 deletions(-)

Acked-by: Bruce Richardson <bruce.richardson@intel.com>



^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 03/13] net/ixgbe: create common Rx queue structure
  2025-05-12 12:54   ` [PATCH v3 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
@ 2025-05-14 16:45     ` Bruce Richardson
  0 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-14 16:45 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, Vladimir Medvedkin

On Mon, May 12, 2025 at 01:54:29PM +0100, Anatoly Burakov wrote:
> In preparation for deduplication effort, generalize the Rx queue structure.
> 
> Most of the fields are simply moved to common/rx.h, clarifying the comments
> where necessary. There are some instances where the field is renamed when
> moving, to make it more consistent with the rest of the codebase.
> 
> Specifically, the following fields are renamed:
> 
> - rdt_reg_addr -> qrx_tail (Rx ring tail register address)
> - rx_using_sse -> vector_rx (indicates if vectorized path is enabled)
> - mb_pool -> mp (other drivers use this name)
> 
> Additionally, some per-driver defines are now also moved to aforementioned
> common Rx header, and re-defined in the driver using said common values.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>  drivers/net/intel/common/rx.h                 |  62 ++++++++
>  drivers/net/intel/ixgbe/ixgbe_ethdev.c        |   8 +-
>  .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    |   8 +-
>  drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 149 +++++++++---------
>  drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  67 +-------
>  .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |   4 +-
>  drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c |  22 +--
>  drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  |  22 +--
>  8 files changed, 172 insertions(+), 170 deletions(-)
> 

I think for ease of git history review, I think I'd like this split into
two patches - one for the ixgbe changes to rename the fields, and then a
second patch to just move the structure to common.

/Bruce


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 04/13] net/i40e: use the common Rx queue structure
  2025-05-12 12:54   ` [PATCH v3 04/13] net/i40e: use the " Anatoly Burakov
@ 2025-05-14 16:52     ` Bruce Richardson
  2025-05-15 11:09       ` Burakov, Anatoly
  0 siblings, 1 reply; 82+ messages in thread
From: Bruce Richardson @ 2025-05-14 16:52 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, Ian Stokes

On Mon, May 12, 2025 at 01:54:30PM +0100, Anatoly Burakov wrote:
> Make the i40e driver use the new common Rx queue structure.
> 
> Because the i40e driver supports both 16-byte and 32-byte descriptor
> formats (controlled by RTE_LIBRTE_I40E_16BYTE_RX_DESC define), the common
> queue structure has to take that into account, so the ring queue structure
> will have both, while the actual descriptor format is picked by i40e at
> compile time using the above macro. Direct usage of Rx queue structure is
> now meant to be replaced with a macro access that takes descriptor size
> into account.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>  drivers/net/intel/common/rx.h                 |  14 ++
>  drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
>  drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
>  drivers/net/intel/i40e/i40e_fdir.c            |  16 +--
>  .../i40e/i40e_recycle_mbufs_vec_common.c      |   6 +-
>  drivers/net/intel/i40e/i40e_rxtx.c            | 126 +++++++++---------
>  drivers/net/intel/i40e/i40e_rxtx.h            |  74 +++-------
>  drivers/net/intel/i40e/i40e_rxtx_common_avx.h |   6 +-
>  .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  22 +--
>  drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  12 +-
>  drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  12 +-
>  drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
>  drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  24 ++--
>  drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  24 ++--
>  14 files changed, 160 insertions(+), 188 deletions(-)
> 
> diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
> index 524de39f9c..db49db57d0 100644
> --- a/drivers/net/intel/common/rx.h
> +++ b/drivers/net/intel/common/rx.h
> @@ -30,6 +30,8 @@ struct ci_rx_queue {
>  	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
>  	union { /* RX ring virtual address */
>  		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
> +		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
> +		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;

Rather than doing this, would it not be better to keep using the current
scheme of just having an #ifdef aliasing i40e_rx_desc to either the 16byte
or 32byte variants?

/Bruce

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 05/13] net/ice: use the common Rx queue structure
  2025-05-12 12:54   ` [PATCH v3 05/13] net/ice: " Anatoly Burakov
@ 2025-05-14 16:56     ` Bruce Richardson
  2025-05-23 11:16       ` Burakov, Anatoly
  0 siblings, 1 reply; 82+ messages in thread
From: Bruce Richardson @ 2025-05-14 16:56 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev

On Mon, May 12, 2025 at 01:54:31PM +0100, Anatoly Burakov wrote:
> Make the ice driver use the new common Rx queue structure.
> 
> Because the ice driver supports both 16-byte and 32-byte descriptor
> formats (controlled by RTE_LIBRTE_ICE_16BYTE_RX_DESC define), the common
> queue structure has to take that into account, so the ring queue
> structure will have both, while the actual descriptor format is picked by
> ice at compile time using the above macro. Direct usage of Rx queue
> structure is now meant to be replaced with a macro access that takes
> descriptor size into account.

I'd have the same comment as on the previous patch. I think it would be
better to not have to use a macro at each reference, but have the struct
type aliased as is done now.

> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>     v2:
>     - Make xtr_field_offs of type ptrdiff_t instead of off_t to fix 32-bit compile
>       issues
> 
>  drivers/net/intel/common/rx.h               |  23 +++
>  drivers/net/intel/ice/ice_dcf.c             |   6 +-
>  drivers/net/intel/ice/ice_dcf_ethdev.c      |  22 +--
>  drivers/net/intel/ice/ice_ethdev.c          |   2 +-
>  drivers/net/intel/ice/ice_ethdev.h          |   5 +-
>  drivers/net/intel/ice/ice_rxtx.c            | 158 ++++++++++----------
>  drivers/net/intel/ice/ice_rxtx.h            |  78 ++--------
>  drivers/net/intel/ice/ice_rxtx_common_avx.h |   6 +-
>  drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |  14 +-
>  drivers/net/intel/ice/ice_rxtx_vec_avx512.c |  16 +-
>  drivers/net/intel/ice/ice_rxtx_vec_common.h |   6 +-
>  drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  22 +--
>  12 files changed, 164 insertions(+), 194 deletions(-)
> 
> diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
> index db49db57d0..9a691971bc 100644
> --- a/drivers/net/intel/common/rx.h
> +++ b/drivers/net/intel/common/rx.h
> @@ -5,6 +5,7 @@
>  #ifndef _COMMON_INTEL_RX_H_
>  #define _COMMON_INTEL_RX_H_
>  
> +#include <stddef.h>
>  #include <stdint.h>
>  #include <unistd.h>
>  #include <rte_mbuf.h>
> @@ -12,6 +13,7 @@
>  
>  #define CI_RX_BURST 32
>  #define CI_RX_MAX_BURST 32
> +#define CI_RX_MAX_NSEG 2
>  
>  struct ci_rx_queue;
>  
> @@ -23,6 +25,8 @@ struct ci_rx_entry_sc {
>  	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
>  };
>  
> +typedef void (*ci_rx_release_mbufs_t)(struct ci_rx_queue *rxq);
> +
>  /**
>   * Structure associated with each RX queue.
>   */
> @@ -32,6 +36,8 @@ struct ci_rx_queue {
>  		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
>  		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
>  		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
> +		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
> +		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
>  	};
>  	volatile uint8_t *qrx_tail;   /**< register address of tail */
>  	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
> @@ -64,10 +70,16 @@ struct ci_rx_queue {
>  	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
>  	uint64_t mbuf_initializer; /**< value to init mbufs */
>  	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
> +	uint32_t rxdid; /**< RX descriptor format ID. */
> +	uint32_t proto_xtr; /* protocol extraction type */
> +	uint64_t xtr_ol_flag; /* flexible descriptor metadata extraction offload flag */
> +	ptrdiff_t xtr_field_offs; /* Protocol extraction matedata offset*/
> +	uint64_t hw_time_update; /**< Last time HW timestamp was updated */

Just to confirm - these are not in the ice-specific section because they
are also used by iavf?

>  	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
>  	struct rte_mbuf fake_mbuf;
>  	union { /* the VSI this queue belongs to */
>  		struct i40e_vsi *i40e_vsi;
> +		struct ice_vsi *ice_vsi;
>  	};

<snip>

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 07/13] net/intel: generalize vectorized Rx rearm
  2025-05-12 12:54   ` [PATCH v3 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
@ 2025-05-15 10:56     ` Bruce Richardson
  0 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-15 10:56 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev

On Mon, May 12, 2025 at 01:54:33PM +0100, Anatoly Burakov wrote:
> There is certain amount of duplication between various drivers when it
> comes to Rx ring rearm. This patch takes implementation from ice driver
> as a base because it has support for no IOVA in mbuf as well as all
> vector implementations, and moves them to a common file.
> 
> The driver Rx rearm code used copious amounts of #ifdef-ery to
> discriminate between 16- and 32-byte descriptor support, but we cannot do
> that in the common code because we will not have access to those
> definitions. So, instead, we use copious amounts of compile-time constant

I was initially wondering why we don't have access to the definitions, but
then I realised it was because the common code doesn't know whether to use
the I40E, ICE or IAVF definition. :-)
However, this leads me to consider whether, if we need to keep these
definitions, we are better to just use a single one, rather than one per
driver.

> propagation and force-inlining to ensure that the compiler generates
> effectively the same code it generated back when it was in the driver. We
> also add a compile-time definition for vectorization levels for x86
> vector instructions to discriminate between different instruction sets.
> This too is constant-propagated, and thus should not affect performance.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

More comments inline below. While I realise this is mostly a copy-paste
transfer, I think we can do some cleanup in the process.

/Bruce

> ---
>  drivers/net/intel/common/rx.h               |   3 +
>  drivers/net/intel/common/rx_vec_sse.h       | 323 ++++++++++++++++++++
>  drivers/net/intel/ice/ice_rxtx.h            |   2 +-
>  drivers/net/intel/ice/ice_rxtx_common_avx.h | 233 --------------
>  drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |   5 +-
>  drivers/net/intel/ice/ice_rxtx_vec_avx512.c |   5 +-
>  drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  77 +----
>  7 files changed, 336 insertions(+), 312 deletions(-)
>  create mode 100644 drivers/net/intel/common/rx_vec_sse.h
>  delete mode 100644 drivers/net/intel/ice/ice_rxtx_common_avx.h
> 
> diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
> index 2d9328ae89..65e920fdd1 100644
> --- a/drivers/net/intel/common/rx.h
> +++ b/drivers/net/intel/common/rx.h
> @@ -14,6 +14,8 @@
>  #define CI_RX_BURST 32
>  #define CI_RX_MAX_BURST 32
>  #define CI_RX_MAX_NSEG 2
> +#define CI_VPMD_DESCS_PER_LOOP 4

Do all our vector PMDs use the same DESC_PER_LOOP value? Do the AVX2 and
AVX512 paths not do 8 at a time?

> +#define CI_VPMD_RX_REARM_THRESH 64
>  
>  struct ci_rx_queue;
>  
> @@ -40,6 +42,7 @@ struct ci_rx_queue {
>  		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
>  		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
>  		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
> +		volatile void *rx_ring; /**< Generic */
>  	};
>  	volatile uint8_t *qrx_tail;   /**< register address of tail */
>  	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
> diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
> new file mode 100644
> index 0000000000..6fe0baf38b
> --- /dev/null
> +++ b/drivers/net/intel/common/rx_vec_sse.h

This file should be called "rx_vec_x86.h", I think, since it has got both
SSE and AVX code in it.

> @@ -0,0 +1,323 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2024 Intel Corporation

Date -> 2025

> + */
> +
> +#ifndef _COMMON_INTEL_RX_VEC_SSE_H_
> +#define _COMMON_INTEL_RX_VEC_SSE_H_
> +
> +#include <stdint.h>
> +
> +#include <ethdev_driver.h>
> +#include <rte_io.h>
> +
> +#include "rx.h"
> +
> +enum ci_rx_vec_level {
> +	CI_RX_VEC_LEVEL_SSE = 0,
> +	CI_RX_VEC_LEVEL_AVX2,
> +	CI_RX_VEC_LEVEL_AVX512,
> +};
> +
> +static inline int
> +_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
> +{
> +	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
> +	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
> +	volatile void *rxdp;
> +	int i;
> +
> +	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
> +
> +	if (rte_mempool_get_bulk(rxq->mp,
> +				 (void **)rxp,
> +				 rearm_thresh) < 0) {

Since we are copying the code to a new place, maybe we can lengthen the
lines a bit to the 100 char limit. I suspect this can be all on one line,
increasing readability.

> +		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
> +			__m128i dma_addr0;
> +
> +			dma_addr0 = _mm_setzero_si128();
> +			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
> +				rxp[i].mbuf = &rxq->fake_mbuf;
> +				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);

If we drop the const here, the cast should not be necessary at all in the
line below, I think.

> +				_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr),
> +						dma_addr0);
> +			}
> +		}
> +		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * SSE code path can handle both 16-byte and 32-byte descriptors with one code
> + * path, as we only ever write 16 bytes at a time.
> + */
> +static __rte_always_inline void
> +_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
> +{
> +	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);

Minor nit, but we are referring to this as "headroom" not "header-room" so
the prefix should probably be "hd_room" (or hdroom), not "hdr_room" :-)

> +	const __m128i zero = _mm_setzero_si128();
> +	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
> +	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
> +	volatile void *rxdp;
> +	int i;
> +
> +	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
> +
> +	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
> +	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
> +		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
> +		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);

We don't need the volatile casts here, since we only ever cast them away
when used with store_si128. In fact, I suspect we don't ever need to have
rxdp be volatile either.

> +		__m128i vaddr0, vaddr1;
> +		__m128i dma_addr0, dma_addr1;
> +		struct rte_mbuf *mb0, *mb1;
> +
> +		mb0 = rxp[0].mbuf;
> +		mb1 = rxp[1].mbuf;
> +
> +#if RTE_IOVA_IN_MBUF
> +		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
> +		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
> +				offsetof(struct rte_mbuf, buf_addr) + 8);
> +#endif
> +		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
> +		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
> +
> +		/* add headroom to address values */
> +		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
> +		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
> +
> +#if RTE_IOVA_IN_MBUF
> +		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
> +		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
> +		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
> +#else
> +		/* erase Header Buffer Address */
> +		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
> +		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
> +#endif
> +
> +		/* flush desc with pa dma_addr */
> +		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
> +		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
> +	}
> +}
> +
> +#ifdef __AVX2__
> +/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
> +static __rte_always_inline void
> +_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
> +{
> +	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
> +	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
> +	const size_t desc_len = 16;
> +	volatile void *rxdp;
> +	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
> +	const __m256i zero = _mm256_setzero_si256();
> +	int i;
> +
> +	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
> +
> +	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
> +	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
> +		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
> +		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);

Again, we can drop volatile, I think.

> +		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
> +		__m256i vaddr0_1, vaddr2_3;
> +		__m256i dma_addr0_1, dma_addr2_3;
> +		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
> +
> +		mb0 = rxp[0].mbuf;
> +		mb1 = rxp[1].mbuf;
> +		mb2 = rxp[2].mbuf;
> +		mb3 = rxp[3].mbuf;
> +
> +#if RTE_IOVA_IN_MBUF
> +		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
> +		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
> +				offsetof(struct rte_mbuf, buf_addr) + 8);
> +#endif
> +		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
> +		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
> +		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
> +		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
> +
> +		/**
> +		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
> +		 * into the high lanes. Similarly for 2 & 3
> +		 */
> +		vaddr0_1 =
> +			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
> +						vaddr1, 1);

Can these statements now fit on a single 100-character line?

> +		vaddr2_3 =
> +			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
> +						vaddr3, 1);
> +
> +		/* add headroom to address values */
> +		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
> +		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
> +
> +#if RTE_IOVA_IN_MBUF
> +		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
> +		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
> +		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
> +#else
> +		/* erase Header Buffer Address */
> +		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
> +		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
> +#endif
> +
> +		/* flush desc with pa dma_addr */
> +		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
> +		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
> +	}
> +}
> +#endif /* __AVX2__ */
> +
> +#ifdef __AVX512VL__
> +/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
> +static __rte_always_inline void
> +_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
> +{
> +	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
> +	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
> +	const size_t desc_len = 16;
> +	volatile void *rxdp;
> +	int i;
> +	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
> +	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
> +	__m512i dma_addr0_3, dma_addr4_7;
> +	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
> +	__m512i zero = _mm512_setzero_si512();
> +
> +	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
> +
> +	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
> +	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
> +		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
> +		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
> +		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
> +		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
> +		__m256i vaddr0_1, vaddr2_3;
> +		__m256i vaddr4_5, vaddr6_7;
> +		__m512i vaddr0_3, vaddr4_7;

Rather than defining all of these variables here, many of which are
throw-away, if we define them on first use the code will be shorter, just
as readable, and also the variables can be made "const".

> +
> +		mb0 = rxp[0].mbuf;
> +		mb1 = rxp[1].mbuf;
> +		mb2 = rxp[2].mbuf;
> +		mb3 = rxp[3].mbuf;
> +		mb4 = rxp[4].mbuf;
> +		mb5 = rxp[5].mbuf;
> +		mb6 = rxp[6].mbuf;
> +		mb7 = rxp[7].mbuf;
> +
> +#if RTE_IOVA_IN_MBUF
> +		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
> +		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
> +				offsetof(struct rte_mbuf, buf_addr) + 8);
> +#endif
> +		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
> +		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
> +		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
> +		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
> +		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
> +		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
> +		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
> +		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
> +
> +		/**
> +		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
> +		 * into the high lanes. Similarly for 2 & 3, and so on.
> +		 */
> +		vaddr0_1 =
> +			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
> +						vaddr1, 1);
> +		vaddr2_3 =
> +			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
> +						vaddr3, 1);
> +		vaddr4_5 =
> +			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
> +						vaddr5, 1);
> +		vaddr6_7 =
> +			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
> +						vaddr7, 1);
> +		vaddr0_3 =
> +			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
> +						vaddr2_3, 1);
> +		vaddr4_7 =
> +			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
> +						vaddr6_7, 1);
> +
> +		/* add headroom to address values */
> +		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
> +		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
> +
> +#if RTE_IOVA_IN_MBUF
> +		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
> +		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
> +		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
> +#else
> +		/* erase Header Buffer Address */
> +		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
> +		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
> +#endif
> +
> +		/* flush desc with pa dma_addr */
> +		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
> +		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
> +	}
> +}
> +#endif /* __AVX512VL__ */
> +
> +static __rte_always_inline void
> +ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
> +		const enum ci_rx_vec_level vec_level)
> +{
> +	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
> +	uint16_t rx_id;
> +
> +	/* Pull 'n' more MBUFs into the software ring */
> +	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
> +		return;
> +
> +	if (desc_len == 16) {
> +		switch (vec_level) {
> +		case CI_RX_VEC_LEVEL_AVX512:
> +#ifdef __AVX512VL__
> +			_ci_rxq_rearm_avx512(rxq);
> +			break;
> +#else
> +			/* fall back to AVX2 unless requested not to */
> +			/* fall through */
> +#endif
> +		case CI_RX_VEC_LEVEL_AVX2:
> +#ifdef __AVX2__
> +			_ci_rxq_rearm_avx2(rxq);
> +			break;
> +#else
> +			/* fall back to SSE if AVX2 isn't supported */
> +			/* fall through */
> +#endif
> +		case CI_RX_VEC_LEVEL_SSE:
> +			_ci_rxq_rearm_sse(rxq, desc_len);
> +			break;
> +		}
> +	} else {
> +		/* for 32-byte descriptors only support SSE */
> +		_ci_rxq_rearm_sse(rxq, desc_len);
> +	}
> +
> +	rxq->rxrearm_start += rearm_thresh;
> +	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
> +		rxq->rxrearm_start = 0;
> +
> +	rxq->rxrearm_nb -= rearm_thresh;
> +
> +	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
> +			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
> +
> +	/* Update the tail pointer on the NIC */
> +	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
> +}
> +
> +#endif /* _COMMON_INTEL_RX_VEC_SSE_H_ */

<snip>

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 08/13] net/i40e: use common Rx rearm code
  2025-05-12 12:54   ` [PATCH v3 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
@ 2025-05-15 10:58     ` Bruce Richardson
  0 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-15 10:58 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, Ian Stokes

On Mon, May 12, 2025 at 01:54:34PM +0100, Anatoly Burakov wrote:
> The i40e driver has an implementation of vectorized mbuf rearm code that
> is identical to the one in the common code, so just use that.
> 
> In addition, the i40e has an implementation of Rx queue rearm for Neon
> instruction set, so create a common header for Neon implementations too,
> and use that in i40e Neon code.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>     v2:
>     - Fix compile issues on Arm64
> 
>  drivers/net/intel/common/rx_vec_neon.h        | 131 +++++++++++
>  drivers/net/intel/i40e/i40e_rxtx.h            |   2 +-
>  drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 215 ------------------
>  drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |   5 +-
>  drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |   5 +-
>  drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  59 +----
>  drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  70 +-----
>  7 files changed, 144 insertions(+), 343 deletions(-)
>  create mode 100644 drivers/net/intel/common/rx_vec_neon.h
>  delete mode 100644 drivers/net/intel/i40e/i40e_rxtx_common_avx.h
> 
> diff --git a/drivers/net/intel/common/rx_vec_neon.h b/drivers/net/intel/common/rx_vec_neon.h
> new file mode 100644
> index 0000000000..d79802b1c0
> --- /dev/null
> +++ b/drivers/net/intel/common/rx_vec_neon.h
> @@ -0,0 +1,131 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2024 Intel Corporation
> + */
> +
> +#ifndef _COMMON_INTEL_RX_VEC_NEON_H_
> +#define _COMMON_INTEL_RX_VEC_NEON_H_
> +
> +#include <stdint.h>
> +
> +#include <ethdev_driver.h>
> +#include <rte_io.h>
> +#include <rte_vect.h>
> +
> +#include "rx.h"
> +
> +static inline int
> +_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
> +{
> +	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
> +	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
> +	volatile void *rxdp;
> +	int i;
> +
> +	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
> +
> +	if (rte_mempool_get_bulk(rxq->mp,
> +				 (void **)rxp,
> +				 rearm_thresh) < 0) {
> +		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
> +			uint64x2_t zero = vdupq_n_u64(0);
> +
> +			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
> +				rxp[i].mbuf = &rxq->fake_mbuf;
> +				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
> +				vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr), zero);

I suspect many comments on the previous patch around the SSE code, e.g.
about unnecessary casting, may be relevant to this patch also.

/Bruce


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 06/13] net/iavf: use the common Rx queue structure
  2025-05-12 12:54   ` [PATCH v3 06/13] net/iavf: " Anatoly Burakov
@ 2025-05-15 10:59     ` Bruce Richardson
  2025-05-15 11:11       ` Burakov, Anatoly
  0 siblings, 1 reply; 82+ messages in thread
From: Bruce Richardson @ 2025-05-15 10:59 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, Vladimir Medvedkin, Ian Stokes

On Mon, May 12, 2025 at 01:54:32PM +0100, Anatoly Burakov wrote:
> Make the iavf driver use the new common Rx queue structure.
> 
> Because the iavf driver supports both 16-byte and 32-byte descriptor
> formats (controlled by RTE_LIBRTE_IAVF_16BYTE_RX_DESC define), the common
> queue structure has to take that into account, so the ring queue structure
> will have both, while the actual descriptor format is picked by iavf at
> compile time using the above macro. Direct usage of Rx queue structure is
> now meant to be replaced with a macro access that takes descriptor size
> into account.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>     v2:
>     - Fix compile issues for Arm
> 
>  drivers/net/intel/common/rx.h                 |  12 ++
>  drivers/net/intel/iavf/iavf.h                 |   4 +-
>  drivers/net/intel/iavf/iavf_ethdev.c          |  12 +-
>  drivers/net/intel/iavf/iavf_rxtx.c            | 192 +++++++++---------
>  drivers/net/intel/iavf/iavf_rxtx.h            |  76 ++-----
>  drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  24 +--
>  drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  22 +-
>  drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  27 ++-
>  drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  30 +--
>  drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  46 ++---
>  drivers/net/intel/iavf/iavf_vchnl.c           |   6 +-
>  11 files changed, 207 insertions(+), 244 deletions(-)
> 
> diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
> index 9a691971bc..2d9328ae89 100644
> --- a/drivers/net/intel/common/rx.h
> +++ b/drivers/net/intel/common/rx.h
> @@ -38,6 +38,8 @@ struct ci_rx_queue {
>  		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
>  		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
>  		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
> +		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
> +		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
>  	};

Aren't the descriptors used by iavf the same as those used by i40e, and the
flex descriptors used by it the same as those used by ice? If so, we should
look to merge their definitions.

/Bruce


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 13/13] net/intel: add common Tx mbuf recycle
  2025-05-12 12:54   ` [PATCH v3 13/13] net/intel: add common Tx " Anatoly Burakov
@ 2025-05-15 11:07     ` Bruce Richardson
  0 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-15 11:07 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, Vladimir Medvedkin

On Mon, May 12, 2025 at 01:54:39PM +0100, Anatoly Burakov wrote:
> Currently, there are duplicate implementations of Tx mbuf recycle in some
> drivers, specifically ixgbe and i40e. Move them into a common header.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>  drivers/net/intel/common/recycle_mbufs.h      | 98 +++++++++++++++++++
>  drivers/net/intel/common/tx.h                 |  1 +
>  .../i40e/i40e_recycle_mbufs_vec_common.c      | 88 +----------------
>  .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 89 +----------------
>  4 files changed, 107 insertions(+), 169 deletions(-)
> 
> diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
> index fd31c5c1ff..88779c5aa4 100644
> --- a/drivers/net/intel/common/recycle_mbufs.h
> +++ b/drivers/net/intel/common/recycle_mbufs.h
> @@ -64,4 +64,102 @@ ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
>  	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
>  }
>  
> +/**
> + * Recycle buffers on Tx. Note: the function must first perform a driver-specific
> + * DD-bit-set check to ensure that the Tx descriptors are ready for recycling.
> + *
> + * @param txq Tx queue pointer
> + * @param recycle_rxq_info recycling mbuf information
> + *
> + * @return how many buffers were recycled
> + */
> +static __rte_always_inline uint16_t
> +ci_tx_recycle_mbufs(struct ci_tx_queue *txq,
> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
> +{
> +	struct ci_tx_entry *txep;
> +	struct rte_mbuf **rxep;
> +	int i, n;
> +	uint16_t nb_recycle_mbufs;
> +	uint16_t avail = 0;
> +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> +	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
> +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> +
> +	/* Get available recycling Rx buffers. */
> +	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> +
> +	/* Check Tx free thresh and Rx available space. */
> +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
> +		return 0;
> +
> +	n = txq->tx_rs_thresh;
> +	nb_recycle_mbufs = n;
> +
> +	/* Mbufs recycle mode can only support no ring buffer wrapping around.
> +	 * Two case for this:
> +	 *
> +	 * case 1: The refill head of Rx buffer ring needs to be aligned with
> +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> +	 * should be equal to refill_requirement.
> +	 *
> +	 * case 2: The refill head of Rx ring buffer does not need to be aligned
> +	 * with mbuf ring size. In this case, the update of refill head can not
> +	 * exceed the Rx mbuf ring size.
> +	 */
> +	if ((refill_requirement && refill_requirement != n) ||
> +		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
> +		return 0;
> +
> +	/* First buffer to free from S/W ring is at index
> +	 * tx_next_dd - (tx_rs_thresh-1).
> +	 */
> +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> +	rxep = recycle_rxq_info->mbuf_ring;
> +	rxep += refill_head;
> +
> +	/* is fast-free enabled in offloads? */
> +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> +		/* Avoid txq containing buffers from unexpected mempool. */
> +		if (unlikely(recycle_rxq_info->mp
> +					!= txep[0].mbuf->pool))
> +			return 0;
> +
> +		/* Directly put mbufs from Tx to Rx. */
> +		for (i = 0; i < n; i++)
> +			rxep[i] = txep[i].mbuf;
> +	} else {
> +		for (i = 0; i < n; i++) {
> +			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> +
> +			/* If Tx buffers are not the last reference or from
> +			 * unexpected mempool, previous copied buffers are
> +			 * considered as invalid.
> +			 */
> +			if (unlikely(rxep[i] == NULL ||
> +				recycle_rxq_info->mp != txep[i].mbuf->pool))
> +				nb_recycle_mbufs = 0;
> +		}
> +		/* If Tx buffers are not the last reference or
> +		 * from unexpected mempool, all recycled buffers
> +		 * are put into mempool.
> +		 */
> +		if (nb_recycle_mbufs == 0)
> +			for (i = 0; i < n; i++) {
> +				if (rxep[i] != NULL)
> +					rte_mempool_put(rxep[i]->pool, rxep[i]);
> +			}
> +	}
> +
> +	/* Update counters for Tx. */
> +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> +
> +	return nb_recycle_mbufs;
> +}
> +
>  #endif
> diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
> index c99bd5420f..cc70fa7db4 100644
> --- a/drivers/net/intel/common/tx.h
> +++ b/drivers/net/intel/common/tx.h
> @@ -37,6 +37,7 @@ struct ci_tx_queue {
>  		volatile struct ice_tx_desc *ice_tx_ring;
>  		volatile struct idpf_base_tx_desc *idpf_tx_ring;
>  		volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
> +		volatile void *tx_ring; /**< Generic. */
>  	};
>  	volatile uint8_t *qtx_tail;               /* register address of tail */
>  	union {
> diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
> index 073357bee2..19edee781d 100644
> --- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
> +++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
> @@ -23,92 +23,12 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
>  	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
>  {
>  	struct ci_tx_queue *txq = tx_queue;
> -	struct ci_tx_entry *txep;
> -	struct rte_mbuf **rxep;
> -	int i, n;
> -	uint16_t nb_recycle_mbufs;
> -	uint16_t avail = 0;
> -	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> -	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> -	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
> -	uint16_t refill_head = *recycle_rxq_info->refill_head;
> -	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> +	const uint64_t ctob = txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz;
>  
> -	/* Get available recycling Rx buffers. */
> -	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> -
> -	/* Check Tx free thresh and Rx available space. */
> -	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
> -		return 0;
> -
> -	/* check DD bits on threshold descriptor */
> -	if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> -				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> +	/* are Tx descriptors ready for recycling? */
> +	if ((ctob & rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
>  			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
>  		return 0;

There is the function i40e_tx_desc_done (and similar functions for other
drivers) to do this check. In the tx cleanup code we pass that function in
as a callback - you could probably shorten things a little by doing so
here. Due to inlining, the indirect function call is eliminated by the
compiler (as constant propagation), leading to no perf penalty.

/Bruce

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 04/13] net/i40e: use the common Rx queue structure
  2025-05-14 16:52     ` Bruce Richardson
@ 2025-05-15 11:09       ` Burakov, Anatoly
  2025-05-15 12:55         ` Bruce Richardson
  0 siblings, 1 reply; 82+ messages in thread
From: Burakov, Anatoly @ 2025-05-15 11:09 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev, Ian Stokes

On 5/14/2025 6:52 PM, Bruce Richardson wrote:
> On Mon, May 12, 2025 at 01:54:30PM +0100, Anatoly Burakov wrote:
>> Make the i40e driver use the new common Rx queue structure.
>>
>> Because the i40e driver supports both 16-byte and 32-byte descriptor
>> formats (controlled by RTE_LIBRTE_I40E_16BYTE_RX_DESC define), the common
>> queue structure has to take that into account, so the ring queue structure
>> will have both, while the actual descriptor format is picked by i40e at
>> compile time using the above macro. Direct usage of Rx queue structure is
>> now meant to be replaced with a macro access that takes descriptor size
>> into account.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---
>>   drivers/net/intel/common/rx.h                 |  14 ++
>>   drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
>>   drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
>>   drivers/net/intel/i40e/i40e_fdir.c            |  16 +--
>>   .../i40e/i40e_recycle_mbufs_vec_common.c      |   6 +-
>>   drivers/net/intel/i40e/i40e_rxtx.c            | 126 +++++++++---------
>>   drivers/net/intel/i40e/i40e_rxtx.h            |  74 +++-------
>>   drivers/net/intel/i40e/i40e_rxtx_common_avx.h |   6 +-
>>   .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  22 +--
>>   drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  12 +-
>>   drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  12 +-
>>   drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
>>   drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  24 ++--
>>   drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  24 ++--
>>   14 files changed, 160 insertions(+), 188 deletions(-)
>>
>> diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
>> index 524de39f9c..db49db57d0 100644
>> --- a/drivers/net/intel/common/rx.h
>> +++ b/drivers/net/intel/common/rx.h
>> @@ -30,6 +30,8 @@ struct ci_rx_queue {
>>   	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
>>   	union { /* RX ring virtual address */
>>   		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
>> +		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
>> +		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
> 
> Rather than doing this, would it not be better to keep using the current
> scheme of just having an #ifdef aliasing i40e_rx_desc to either the 16byte
> or 32byte variants?
> 
> /Bruce

I had that in my first iteration, but I don't really like having 
driver-specific #ifdefs in a common header. Plus, it also introduces 
header ordering dependencies (need to include header with definitions 
before including this one), so I decided against it.

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 06/13] net/iavf: use the common Rx queue structure
  2025-05-15 10:59     ` Bruce Richardson
@ 2025-05-15 11:11       ` Burakov, Anatoly
  2025-05-15 12:57         ` Bruce Richardson
  0 siblings, 1 reply; 82+ messages in thread
From: Burakov, Anatoly @ 2025-05-15 11:11 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev, Vladimir Medvedkin, Ian Stokes

On 5/15/2025 12:59 PM, Bruce Richardson wrote:
> On Mon, May 12, 2025 at 01:54:32PM +0100, Anatoly Burakov wrote:
>> Make the iavf driver use the new common Rx queue structure.
>>
>> Because the iavf driver supports both 16-byte and 32-byte descriptor
>> formats (controlled by RTE_LIBRTE_IAVF_16BYTE_RX_DESC define), the common
>> queue structure has to take that into account, so the ring queue structure
>> will have both, while the actual descriptor format is picked by iavf at
>> compile time using the above macro. Direct usage of Rx queue structure is
>> now meant to be replaced with a macro access that takes descriptor size
>> into account.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---
>>
>> Notes:
>>      v2:
>>      - Fix compile issues for Arm
>>
>>   drivers/net/intel/common/rx.h                 |  12 ++
>>   drivers/net/intel/iavf/iavf.h                 |   4 +-
>>   drivers/net/intel/iavf/iavf_ethdev.c          |  12 +-
>>   drivers/net/intel/iavf/iavf_rxtx.c            | 192 +++++++++---------
>>   drivers/net/intel/iavf/iavf_rxtx.h            |  76 ++-----
>>   drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  24 +--
>>   drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  22 +-
>>   drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  27 ++-
>>   drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  30 +--
>>   drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  46 ++---
>>   drivers/net/intel/iavf/iavf_vchnl.c           |   6 +-
>>   11 files changed, 207 insertions(+), 244 deletions(-)
>>
>> diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
>> index 9a691971bc..2d9328ae89 100644
>> --- a/drivers/net/intel/common/rx.h
>> +++ b/drivers/net/intel/common/rx.h
>> @@ -38,6 +38,8 @@ struct ci_rx_queue {
>>   		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
>>   		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
>>   		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
>> +		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
>> +		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
>>   	};
> 
> Aren't the descriptors used by iavf the same as those used by i40e, and the
> flex descriptors used by it the same as those used by ice? If so, we should
> look to merge their definitions.
> 

They're coming from base code. We can do that, but I'm not sure how that 
would play with the rest of the driver and compatibility between ethdev 
parts and calling into base code.

> /Bruce
> 


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct
  2025-05-14 16:32   ` Bruce Richardson
@ 2025-05-15 11:15     ` Burakov, Anatoly
  2025-05-15 12:58       ` Bruce Richardson
  0 siblings, 1 reply; 82+ messages in thread
From: Burakov, Anatoly @ 2025-05-15 11:15 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev, Vladimir Medvedkin

On 5/14/2025 6:32 PM, Bruce Richardson wrote:
> On Mon, May 12, 2025 at 01:54:27PM +0100, Anatoly Burakov wrote:
>> The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
>> anywhere in the codebase, and can be removed.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> 
> With the one comment below fixed.
> 
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> 
> 
>> ---
>>   drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
>>   drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
>>   2 files changed, 2 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
>> index 95c80ac1b8..0c07ce3186 100644
>> --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
>> +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
>> @@ -3296,17 +3296,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
>>   	/*
>>   	 * Modified to setup VFRDT for Virtual Function
>>   	 */
>> -	if (ixgbe_is_vf(dev)) {
>> +	if (ixgbe_is_vf(dev))
>>   		rxq->rdt_reg_addr =
>>   			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
>> -		rxq->rdh_reg_addr =
>> -			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
>> -	} else {
>> +	else
>>   		rxq->rdt_reg_addr =
>>   			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
>> -		rxq->rdh_reg_addr =
>> -			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
>> -	}
>>   
> 
> Unrelated code change. Please remove from this patch in any subsequent
> versions - it's not worth fixing this unless we are otherwise modifying
> this function.

It was removed because we either keep rdh_reg_addr in common Rx queue 
structure, or we get rid of both the field and all its usages. If we 
don't remove this, we'll have to include rdh_reg_addr in common Rx queue 
structure and since it's not used anywhere except here it would be 
pretty pointless to do so.

> 
>>   	rxq->rx_ring_phys_addr = rz->iova;
>>   	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
>> diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
>> index 641f982b01..20a5c5a0af 100644
>> --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
>> +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
>> @@ -85,7 +85,6 @@ struct ixgbe_rx_queue {
>>   	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
>>   	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
>>   	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
>> -	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
>>   	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
>>   	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
>>   	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
>> -- 
>> 2.47.1
>>


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 04/13] net/i40e: use the common Rx queue structure
  2025-05-15 11:09       ` Burakov, Anatoly
@ 2025-05-15 12:55         ` Bruce Richardson
  0 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-15 12:55 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: dev, Ian Stokes

On Thu, May 15, 2025 at 01:09:31PM +0200, Burakov, Anatoly wrote:
> On 5/14/2025 6:52 PM, Bruce Richardson wrote:
> > On Mon, May 12, 2025 at 01:54:30PM +0100, Anatoly Burakov wrote:
> > > Make the i40e driver use the new common Rx queue structure.
> > > 
> > > Because the i40e driver supports both 16-byte and 32-byte descriptor
> > > formats (controlled by RTE_LIBRTE_I40E_16BYTE_RX_DESC define), the common
> > > queue structure has to take that into account, so the ring queue structure
> > > will have both, while the actual descriptor format is picked by i40e at
> > > compile time using the above macro. Direct usage of Rx queue structure is
> > > now meant to be replaced with a macro access that takes descriptor size
> > > into account.
> > > 
> > > Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> > > ---
> > >   drivers/net/intel/common/rx.h                 |  14 ++
> > >   drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
> > >   drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
> > >   drivers/net/intel/i40e/i40e_fdir.c            |  16 +--
> > >   .../i40e/i40e_recycle_mbufs_vec_common.c      |   6 +-
> > >   drivers/net/intel/i40e/i40e_rxtx.c            | 126 +++++++++---------
> > >   drivers/net/intel/i40e/i40e_rxtx.h            |  74 +++-------
> > >   drivers/net/intel/i40e/i40e_rxtx_common_avx.h |   6 +-
> > >   .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  22 +--
> > >   drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  12 +-
> > >   drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  12 +-
> > >   drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
> > >   drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  24 ++--
> > >   drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  24 ++--
> > >   14 files changed, 160 insertions(+), 188 deletions(-)
> > > 
> > > diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
> > > index 524de39f9c..db49db57d0 100644
> > > --- a/drivers/net/intel/common/rx.h
> > > +++ b/drivers/net/intel/common/rx.h
> > > @@ -30,6 +30,8 @@ struct ci_rx_queue {
> > >   	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
> > >   	union { /* RX ring virtual address */
> > >   		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
> > > +		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
> > > +		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
> > 
> > Rather than doing this, would it not be better to keep using the current
> > scheme of just having an #ifdef aliasing i40e_rx_desc to either the 16byte
> > or 32byte variants?
> > 
> > /Bruce
> 
> I had that in my first iteration, but I don't really like having
> driver-specific #ifdefs in a common header. Plus, it also introduces header
> ordering dependencies (need to include header with definitions before
> including this one), so I decided against it.
> 
The header ordering should not be a problem. The definition placeholder for
the 16-byte descriptors is in the global rte_config.h.

/Bruce

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 06/13] net/iavf: use the common Rx queue structure
  2025-05-15 11:11       ` Burakov, Anatoly
@ 2025-05-15 12:57         ` Bruce Richardson
  0 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-15 12:57 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: dev, Vladimir Medvedkin, Ian Stokes

On Thu, May 15, 2025 at 01:11:03PM +0200, Burakov, Anatoly wrote:
> On 5/15/2025 12:59 PM, Bruce Richardson wrote:
> > On Mon, May 12, 2025 at 01:54:32PM +0100, Anatoly Burakov wrote:
> > > Make the iavf driver use the new common Rx queue structure.
> > > 
> > > Because the iavf driver supports both 16-byte and 32-byte descriptor
> > > formats (controlled by RTE_LIBRTE_IAVF_16BYTE_RX_DESC define), the common
> > > queue structure has to take that into account, so the ring queue structure
> > > will have both, while the actual descriptor format is picked by iavf at
> > > compile time using the above macro. Direct usage of Rx queue structure is
> > > now meant to be replaced with a macro access that takes descriptor size
> > > into account.
> > > 
> > > Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> > > ---
> > > 
> > > Notes:
> > >      v2:
> > >      - Fix compile issues for Arm
> > > 
> > >   drivers/net/intel/common/rx.h                 |  12 ++
> > >   drivers/net/intel/iavf/iavf.h                 |   4 +-
> > >   drivers/net/intel/iavf/iavf_ethdev.c          |  12 +-
> > >   drivers/net/intel/iavf/iavf_rxtx.c            | 192 +++++++++---------
> > >   drivers/net/intel/iavf/iavf_rxtx.h            |  76 ++-----
> > >   drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  24 +--
> > >   drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  22 +-
> > >   drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  27 ++-
> > >   drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  30 +--
> > >   drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  46 ++---
> > >   drivers/net/intel/iavf/iavf_vchnl.c           |   6 +-
> > >   11 files changed, 207 insertions(+), 244 deletions(-)
> > > 
> > > diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
> > > index 9a691971bc..2d9328ae89 100644
> > > --- a/drivers/net/intel/common/rx.h
> > > +++ b/drivers/net/intel/common/rx.h
> > > @@ -38,6 +38,8 @@ struct ci_rx_queue {
> > >   		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
> > >   		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
> > >   		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
> > > +		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
> > > +		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
> > >   	};
> > 
> > Aren't the descriptors used by iavf the same as those used by i40e, and the
> > flex descriptors used by it the same as those used by ice? If so, we should
> > look to merge their definitions.
> > 
> 
> They're coming from base code. We can do that, but I'm not sure how that
> would play with the rest of the driver and compatibility between ethdev
> parts and calling into base code.
> 
Hmmm, for the base code, we could just add a typedef for iavf_rx_desc to
i40e_rx_desc in the osdep.h header.

/Bruce

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct
  2025-05-15 11:15     ` Burakov, Anatoly
@ 2025-05-15 12:58       ` Bruce Richardson
  0 siblings, 0 replies; 82+ messages in thread
From: Bruce Richardson @ 2025-05-15 12:58 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: dev, Vladimir Medvedkin

On Thu, May 15, 2025 at 01:15:43PM +0200, Burakov, Anatoly wrote:
> On 5/14/2025 6:32 PM, Bruce Richardson wrote:
> > On Mon, May 12, 2025 at 01:54:27PM +0100, Anatoly Burakov wrote:
> > > The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
> > > anywhere in the codebase, and can be removed.
> > > 
> > > Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> > 
> > With the one comment below fixed.
> > 
> > Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> > 
> > 
> > > ---
> > >   drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
> > >   drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
> > >   2 files changed, 2 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
> > > index 95c80ac1b8..0c07ce3186 100644
> > > --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
> > > +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
> > > @@ -3296,17 +3296,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
> > >   	/*
> > >   	 * Modified to setup VFRDT for Virtual Function
> > >   	 */
> > > -	if (ixgbe_is_vf(dev)) {
> > > +	if (ixgbe_is_vf(dev))
> > >   		rxq->rdt_reg_addr =
> > >   			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
> > > -		rxq->rdh_reg_addr =
> > > -			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
> > > -	} else {
> > > +	else
> > >   		rxq->rdt_reg_addr =
> > >   			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
> > > -		rxq->rdh_reg_addr =
> > > -			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
> > > -	}
> > 
> > Unrelated code change. Please remove from this patch in any subsequent
> > versions - it's not worth fixing this unless we are otherwise modifying
> > this function.
> 
> It was removed because we either keep rdh_reg_addr in common Rx queue
> structure, or we get rid of both the field and all its usages. If we don't
> remove this, we'll have to include rdh_reg_addr in common Rx queue structure
> and since it's not used anywhere except here it would be pretty pointless to
> do so.
> 
My mistake. Yes, this is a proper change belonging here, sorry.

/Bruce

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH v3 05/13] net/ice: use the common Rx queue structure
  2025-05-14 16:56     ` Bruce Richardson
@ 2025-05-23 11:16       ` Burakov, Anatoly
  0 siblings, 0 replies; 82+ messages in thread
From: Burakov, Anatoly @ 2025-05-23 11:16 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev

On 5/14/2025 6:56 PM, Bruce Richardson wrote:
> On Mon, May 12, 2025 at 01:54:31PM +0100, Anatoly Burakov wrote:
>> Make the ice driver use the new common Rx queue structure.
>>
>> Because the ice driver supports both 16-byte and 32-byte descriptor
>> formats (controlled by RTE_LIBRTE_ICE_16BYTE_RX_DESC define), the common
>> queue structure has to take that into account, so the ring queue
>> structure will have both, while the actual descriptor format is picked by
>> ice at compile time using the above macro. Direct usage of Rx queue
>> structure is now meant to be replaced with a macro access that takes
>> descriptor size into account.
> 
> I'd have the same comment as on the previous patch. I think it would be
> better to not have to use a macro at each reference, but have the struct
> type aliased as is done now.
> 
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---
>>
>> Notes:
>>      v2:
>>      - Make xtr_field_offs of type ptrdiff_t instead of off_t to fix 32-bit compile
>>        issues
>>
>>   drivers/net/intel/common/rx.h               |  23 +++
>>   drivers/net/intel/ice/ice_dcf.c             |   6 +-
>>   drivers/net/intel/ice/ice_dcf_ethdev.c      |  22 +--
>>   drivers/net/intel/ice/ice_ethdev.c          |   2 +-
>>   drivers/net/intel/ice/ice_ethdev.h          |   5 +-
>>   drivers/net/intel/ice/ice_rxtx.c            | 158 ++++++++++----------
>>   drivers/net/intel/ice/ice_rxtx.h            |  78 ++--------
>>   drivers/net/intel/ice/ice_rxtx_common_avx.h |   6 +-
>>   drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |  14 +-
>>   drivers/net/intel/ice/ice_rxtx_vec_avx512.c |  16 +-
>>   drivers/net/intel/ice/ice_rxtx_vec_common.h |   6 +-
>>   drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  22 +--
>>   12 files changed, 164 insertions(+), 194 deletions(-)
>>
>> diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
>> index db49db57d0..9a691971bc 100644
>> --- a/drivers/net/intel/common/rx.h
>> +++ b/drivers/net/intel/common/rx.h
>> @@ -5,6 +5,7 @@
>>   #ifndef _COMMON_INTEL_RX_H_
>>   #define _COMMON_INTEL_RX_H_
>>   
>> +#include <stddef.h>
>>   #include <stdint.h>
>>   #include <unistd.h>
>>   #include <rte_mbuf.h>
>> @@ -12,6 +13,7 @@
>>   
>>   #define CI_RX_BURST 32
>>   #define CI_RX_MAX_BURST 32
>> +#define CI_RX_MAX_NSEG 2
>>   
>>   struct ci_rx_queue;
>>   
>> @@ -23,6 +25,8 @@ struct ci_rx_entry_sc {
>>   	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
>>   };
>>   
>> +typedef void (*ci_rx_release_mbufs_t)(struct ci_rx_queue *rxq);
>> +
>>   /**
>>    * Structure associated with each RX queue.
>>    */
>> @@ -32,6 +36,8 @@ struct ci_rx_queue {
>>   		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
>>   		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
>>   		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
>> +		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
>> +		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
>>   	};
>>   	volatile uint8_t *qrx_tail;   /**< register address of tail */
>>   	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
>> @@ -64,10 +70,16 @@ struct ci_rx_queue {
>>   	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
>>   	uint64_t mbuf_initializer; /**< value to init mbufs */
>>   	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
>> +	uint32_t rxdid; /**< RX descriptor format ID. */
>> +	uint32_t proto_xtr; /* protocol extraction type */
>> +	uint64_t xtr_ol_flag; /* flexible descriptor metadata extraction offload flag */
>> +	ptrdiff_t xtr_field_offs; /* Protocol extraction matedata offset*/
>> +	uint64_t hw_time_update; /**< Last time HW timestamp was updated */
> 
> Just to confirm - these are not in the ice-specific section because they
> are also used by iavf?

Yes, it is for that reason. Any fields moved to common parts are present 
in multiple drivers. There will be some instances where driver specific 
fields do the same things slightly differently (prime example is 
timestamping support), this can be future work.

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 00/25] Intel PMD drivers Rx cleanp
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (13 preceding siblings ...)
  2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
@ 2025-05-30 13:56 ` Anatoly Burakov
  2025-05-30 13:56   ` [PATCH v4 01/25] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                     ` (24 more replies)
  14 siblings, 25 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:56 UTC (permalink / raw)
  To: dev; +Cc: bruce.richardson

There is certain amount of duplication between different ixgbe, i40e, ice, and
iavf. This patchset specifically focuses on laying groundwork for deduplicating
the Rx side of things - providing common queue definitions, using common Rx
descriptor formats, and some low hanging fruit in terms of Rx code
deduplication, including vectorized implementations.

The first bunch of patches focus on small cleanups:

- variable renames
- definition renames
- using  one definition for 16-byte descriptors instead per-driver
- fixing some cruft in IXGBE headers

Starting with patch 15 comes the meat of the patchset:

- creating common Rx queue structure
- unifying Rx rearm code to make use of new unified structures and definitions
- expanding Rx rearm code to have SSE, AVX2, and AVX512 implementations

At the end, there are a couple of patches that unify Rx and Tx mbuf recycle
for some drivers, but more work is needed on that front because we only touch
two drivers in each case - the others do have them, but they're implemented
differently from the ones that were deduplicated.

v3 -> v4:
- Added a lot of cleanups to the patchset
- Moved to using unified Rx descriptor definitions
- Using a single compilation flag to support 16-byte descriptors
- Fixes for whitespace, syntax, etc.

Anatoly Burakov (25):
  net/ixgbe: remove unused field in Rx queue struct
  net/iavf: make IPsec stats dynamically allocated
  net/ixgbe: match variable names to other drivers
  net/i40e: match variable name to other drivers
  net/ice: match variable name to other drivers
  net/i40e: rename 16-byte descriptor define
  net/ice: rename 16-byte descriptor define
  net/iavf: rename 16-byte descriptor define
  net/ixgbe: simplify vector PMD compilation
  net/ixgbe: replace always-true check
  net/ixgbe: clean up definitions
  net/i40e: clean up definitions
  net/ice: clean up definitions
  net/iavf: clean up definitions
  net/ixgbe: create common Rx queue structure
  net/i40e: use the common Rx queue structure
  net/ice: use the common Rx queue structure
  net/iavf: use the common Rx queue structure
  net/intel: generalize vectorized Rx rearm
  net/i40e: use common Rx rearm code
  net/iavf: use common Rx rearm code
  net/ixgbe: use common Rx rearm code
  net/intel: support wider x86 vectors for Rx rearm
  net/intel: add common Rx mbuf recycle
  net/intel: add common Tx mbuf recycle

 app/test-pmd/config.c                         |   4 +-
 config/rte_config.h                           |   2 +-
 doc/guides/nics/i40e.rst                      |   4 +-
 drivers/net/intel/common/desc.h               | 157 +++++++++
 .../recycle_mbufs.h}                          |  78 +++--
 drivers/net/intel/common/rx.h                 | 119 ++++++-
 drivers/net/intel/common/rx_vec_arm.h         | 105 ++++++
 drivers/net/intel/common/rx_vec_ppc.h         | 121 +++++++
 drivers/net/intel/common/rx_vec_x86.h         | 330 ++++++++++++++++++
 drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
 drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
 drivers/net/intel/i40e/i40e_fdir.c            |  18 +-
 drivers/net/intel/i40e/i40e_pf.c              |   2 +-
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 127 +------
 drivers/net/intel/i40e/i40e_rxtx.c            | 194 +++++-----
 drivers/net/intel/i40e/i40e_rxtx.h            |  87 +----
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 215 ------------
 .../net/intel/i40e/i40e_rxtx_vec_altivec.c    | 135 ++-----
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  59 ++--
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  59 ++--
 drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   | 124 ++-----
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    | 130 ++-----
 drivers/net/intel/iavf/iavf.h                 |   4 +-
 drivers/net/intel/iavf/iavf_ethdev.c          |  13 +-
 drivers/net/intel/iavf/iavf_rxtx.c            | 265 +++++++-------
 drivers/net/intel/iavf/iavf_rxtx.h            | 169 +--------
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  91 +++--
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  85 +++--
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 218 +-----------
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  80 +----
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    | 148 +++-----
 drivers/net/intel/iavf/iavf_vchnl.c           |   8 +-
 drivers/net/intel/ice/ice_dcf.c               |   7 +-
 drivers/net/intel/ice/ice_dcf_ethdev.c        |  27 +-
 drivers/net/intel/ice/ice_ethdev.c            |   2 +-
 drivers/net/intel/ice/ice_ethdev.h            |   4 +-
 drivers/net/intel/ice/ice_rxtx.c              | 222 ++++++------
 drivers/net/intel/ice/ice_rxtx.h              |  92 +----
 drivers/net/intel/ice/ice_rxtx_common_avx.h   | 233 -------------
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c     |  45 ++-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c   |  53 ++-
 drivers/net/intel/ice/ice_rxtx_vec_common.h   |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c      | 125 ++-----
 drivers/net/intel/ixgbe/ixgbe_ethdev.c        |   8 +-
 drivers/net/intel/ixgbe/ixgbe_ipsec.c         |  10 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 127 +------
 drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 251 +++++++------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          | 103 ++----
 .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |  15 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c | 123 ++-----
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  | 136 ++------
 drivers/net/intel/ixgbe/meson.build           |   2 +
 53 files changed, 1960 insertions(+), 2794 deletions(-)
 create mode 100644 drivers/net/intel/common/desc.h
 copy drivers/net/intel/{i40e/i40e_recycle_mbufs_vec_common.c => common/recycle_mbufs.h} (69%)
 create mode 100644 drivers/net/intel/common/rx_vec_arm.h
 create mode 100644 drivers/net/intel/common/rx_vec_ppc.h
 create mode 100644 drivers/net/intel/common/rx_vec_x86.h
 delete mode 100644 drivers/net/intel/i40e/i40e_rxtx_common_avx.h
 delete mode 100644 drivers/net/intel/ice/ice_rxtx_common_avx.h

-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 01/25] net/ixgbe: remove unused field in Rx queue struct
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
@ 2025-05-30 13:56   ` Anatoly Burakov
  2025-05-30 13:56   ` [PATCH v4 02/25] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
                     ` (23 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:56 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
anywhere in the codebase, and can be removed.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 95c80ac1b8..0c07ce3186 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -3296,17 +3296,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	/*
 	 * Modified to setup VFRDT for Virtual Function
 	 */
-	if (ixgbe_is_vf(dev)) {
+	if (ixgbe_is_vf(dev))
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
-	} else {
+	else
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
-	}
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 641f982b01..20a5c5a0af 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -85,7 +85,6 @@ struct ixgbe_rx_queue {
 	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
 	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
 	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
 	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
 	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 02/25] net/iavf: make IPsec stats dynamically allocated
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
  2025-05-30 13:56   ` [PATCH v4 01/25] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
@ 2025-05-30 13:56   ` Anatoly Burakov
  2025-05-30 13:56   ` [PATCH v4 03/25] net/ixgbe: match variable names to other drivers Anatoly Burakov
                     ` (22 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:56 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

Currently, the stats structure is directly embedded in the queue structure.
We're about to move iavf driver to a common Rx queue structure, so we can't
have driver-specific structures that aren't pointers, inside the common
queue structure. To prepare, we replace direct embedding into the queue
structure with a pointer to the stats structure.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/intel/iavf/iavf_ethdev.c |  2 +-
 drivers/net/intel/iavf/iavf_rxtx.c   | 21 ++++++++++++++++++---
 drivers/net/intel/iavf/iavf_rxtx.h   |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index b3dacbef84..5babd587b3 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -1870,7 +1870,7 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 		struct iavf_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
 		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
-		stats = &rxq->stats.ipsec_crypto;
+		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
 		ips->ierrors.count += stats->ierrors.count;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 5411eb6897..d23d2df807 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -619,6 +619,18 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	/* Allocate stats */
+	rxq->stats = rte_zmalloc_socket("iavf rxq stats",
+				 sizeof(struct iavf_rx_queue_stats),
+				 RTE_CACHE_LINE_SIZE,
+				 socket_id);
+	if (!rxq->stats) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for "
+			     "rx queue stats");
+		rte_free(rxq);
+		return -ENOMEM;
+	}
+
 	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
 		proto_xtr = vf->proto_xtr ? vf->proto_xtr[queue_idx] :
 				IAVF_PROTO_XTR_NONE;
@@ -677,6 +689,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 				   socket_id);
 	if (!rxq->sw_ring) {
 		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -693,6 +706,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!mz) {
 		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for RX");
 		rte_free(rxq->sw_ring);
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -1054,6 +1068,7 @@ iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 	iavf_rxq_release_mbufs_ops[q->rel_mbufs_type].release_mbufs(q);
 	rte_free(q->sw_ring);
 	rte_memzone_free(q->mz);
+	rte_free(q->stats);
 	rte_free(q);
 }
 
@@ -1581,7 +1596,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(rxm, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -1750,7 +1765,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(first_seg, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -2034,7 +2049,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
 			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
 			iavf_flex_rxd_to_ipsec_crypto_status(mb, &rxdp[j],
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
 			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 0b5d67e718..62b5a67c84 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -268,7 +268,7 @@ struct iavf_rx_queue {
 	uint8_t proto_xtr; /* protocol extraction type */
 	uint64_t xtr_ol_flag;
 		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats stats;
+	struct iavf_rx_queue_stats *stats;
 	uint64_t offloads;
 	uint64_t phc_time;
 	uint64_t hw_time_update;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 03/25] net/ixgbe: match variable names to other drivers
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
  2025-05-30 13:56   ` [PATCH v4 01/25] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-30 13:56   ` [PATCH v4 02/25] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
@ 2025-05-30 13:56   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 04/25] net/i40e: match variable name " Anatoly Burakov
                     ` (21 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:56 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

Currently, the ixgbe driver has variables that have the same semantics as
in other drivers, but have different names. Rename these variables to match
ones in other drivers:

- rdt_reg_addr -> qrx_tail (Rx ring tail register address)
- rx_using_sse -> vector_rx (indicates if vectorized path is enabled)
- mb_pool -> mp (other drivers use this name for mempool)

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Make this commit separate

 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    |  2 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 39 +++++++++----------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  6 +--
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c |  4 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  |  4 +-
 5 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index c1b086ef6d..2ab7abbf4e 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -42,7 +42,7 @@ ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 0c07ce3186..f75821029d 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -1679,7 +1679,7 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 	/* allocate buffers in bulk directly into the S/W ring */
 	alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
 	rxep = &rxq->sw_ring[alloc_idx];
-	diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
+	diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep,
 				    rxq->rx_free_thresh);
 	if (unlikely(diag != 0))
 		return -ENOMEM;
@@ -1778,8 +1778,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		/* update tail pointer */
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
-					    cur_free_trigger);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, cur_free_trigger);
 	}
 
 	if (rxq->rx_tail >= rxq->nb_rx_desc)
@@ -1908,7 +1907,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) rx_id, (unsigned) staterr,
 			   (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
 
-		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (nmb == NULL) {
 			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
 				   "queue_id=%u", (unsigned) rxq->port_id,
@@ -2017,7 +2016,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) nb_rx);
 		rx_id = (uint16_t) ((rx_id == 0) ?
 				     (rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+		IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -2165,7 +2164,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rte_le_to_cpu_16(rxd.wb.upper.length));
 
 		if (!bulk_alloc) {
-			nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+			nmb = rte_mbuf_raw_alloc(rxq->mp);
 			if (nmb == NULL) {
 				PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
 						  "port_id=%u queue_id=%u",
@@ -2181,7 +2180,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			if (!ixgbe_rx_alloc_bufs(rxq, false)) {
 				rte_wmb();
 				IXGBE_PCI_REG_WC_WRITE_RELAXED(
-							rxq->rdt_reg_addr,
+							rxq->qrx_tail,
 							next_rdt);
 				nb_hold -= rxq->rx_free_thresh;
 			} else {
@@ -2347,7 +2346,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
 
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, prev_id);
 		nb_hold = 0;
 	}
 
@@ -2974,7 +2973,7 @@ ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
 	unsigned i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		ixgbe_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -3238,7 +3237,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 				 RTE_CACHE_LINE_SIZE, socket_id);
 	if (rxq == NULL)
 		return -ENOMEM;
-	rxq->mb_pool = mp;
+	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
 	rxq->queue_id = queue_idx;
@@ -3297,10 +3296,10 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 * Modified to setup VFRDT for Virtual Function
 	 */
 	if (ixgbe_is_vf(dev))
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
 	else
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
 
 	rxq->rx_ring_phys_addr = rz->iova;
@@ -3409,7 +3408,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 		return -EINVAL;
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-	if (rxq->rx_using_sse)
+	if (rxq->vector_rx)
 		nb_hold = rxq->rxrearm_nb;
 	else
 #endif
@@ -4677,7 +4676,7 @@ ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
 	/* Initialize software ring entries */
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ixgbe_adv_rx_desc *rxd;
-		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
+		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (mbuf == NULL) {
 			PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
@@ -5111,7 +5110,7 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
 
-		rxq->rx_using_sse = rx_using_sse;
+		rxq->vector_rx = rx_using_sse;
 #ifdef RTE_LIB_SECURITY
 		rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_SECURITY);
@@ -5217,7 +5216,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 		 */
 
 		rscctl |= IXGBE_RSCCTL_RSCEN;
-		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
+		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mp);
 		psrtype |= IXGBE_PSRTYPE_TCPHDR;
 
 		/*
@@ -5374,7 +5373,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -5827,7 +5826,7 @@ ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 
 	rxq = dev->data->rx_queues[queue_id];
 
-	qinfo->mp = rxq->mb_pool;
+	qinfo->mp = rxq->mp;
 	qinfo->scattered_rx = dev->data->scattered_rx;
 	qinfo->nb_desc = rxq->nb_rx_desc;
 
@@ -5867,7 +5866,7 @@ ixgbe_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	rxq = dev->data->rx_queues[queue_id];
 
 	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
-	recycle_rxq_info->mp = rxq->mb_pool;
+	recycle_rxq_info->mp = rxq->mp;
 	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
 	recycle_rxq_info->receive_tail = &rxq->rx_tail;
 
@@ -5972,7 +5971,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 20a5c5a0af..c86714804f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -81,10 +81,10 @@ struct ixgbe_scattered_rx_entry {
  * Structure associated with each RX queue.
  */
 struct ixgbe_rx_queue {
-	struct rte_mempool  *mb_pool; /**< mbuf pool to populate RX ring. */
+	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
 	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
 	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
-	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
+	volatile uint32_t   *qrx_tail; /**< RDT register address. */
 	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
 	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
@@ -96,7 +96,7 @@ struct ixgbe_rx_queue {
 	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
 	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
 	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	uint8_t            rx_using_sse;
+	uint8_t            vector_rx;
 	/**< indicates that vector RX is in use */
 #ifdef RTE_LIB_SECURITY
 	uint8_t            using_ipsec;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 9ccd8eba25..f8916d44e8 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -27,7 +27,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
+	if (unlikely(rte_mempool_get_bulk(rxq->mp,
 					  (void *)rxep,
 					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -76,7 +76,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 static inline void
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index e125f52cc5..9417e5b11f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -29,7 +29,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mb_pool,
+	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
 				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -86,7 +86,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
 
 #ifdef RTE_LIB_SECURITY
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 04/25] net/i40e: match variable name to other drivers
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (2 preceding siblings ...)
  2025-05-30 13:56   ` [PATCH v4 03/25] net/ixgbe: match variable names to other drivers Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 05/25] net/ice: " Anatoly Burakov
                     ` (20 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Ian Stokes, Bruce Richardson

Currently, the i40e driver has a variable that has the same semantics as
in other drivers, but has a different name. Rename `rx_using_sse` to
`vector_rx` to match it to other drivers.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Make this commit separate

 drivers/net/intel/i40e/i40e_rxtx.c             | 8 ++++----
 drivers/net/intel/i40e/i40e_rxtx.h             | 2 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c | 2 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c    | 2 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c     | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index c3ff2e05c3..b4caa3bdd5 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -2633,7 +2633,7 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 	uint16_t i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		i40e_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -3316,7 +3316,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-	uint16_t rx_using_sse, i;
+	uint16_t vector_rx, i;
 	/* In order to allow Vector Rx there are a few configuration
 	 * conditions to be met and Rx Bulk Allocation should be allowed.
 	 */
@@ -3427,7 +3427,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 
 	/* Propagate information about RX function choice through all queues. */
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		rx_using_sse =
+		vector_rx =
 			(dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
 			 dev->rx_pkt_burst == i40e_recv_pkts_vec ||
 #ifdef CC_AVX512_SUPPORT
@@ -3441,7 +3441,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 			struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
 
 			if (rxq)
-				rxq->rx_using_sse = rx_using_sse;
+				rxq->vector_rx = vector_rx;
 		}
 	}
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 2f32fc5686..9db044f280 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -118,7 +118,7 @@ struct i40e_rx_queue {
 	uint8_t hs_mode; /* Header Split mode */
 	bool q_set; /**< indicate if rx queue has been configured */
 	bool rx_deferred_start; /**< don't start this queue in dev start */
-	uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
+	uint16_t vector_rx; /**<flag indicate the usage of vPMD for rx */
 	uint8_t dcb_tc;         /**< Traffic class of rx queue */
 	uint64_t offloads; /**< Rx offload flags of RTE_ETH_RX_OFFLOAD_* */
 	const struct rte_memzone *mz;
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index 42beff6e89..01dee811ba 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -619,7 +619,7 @@ i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
 int __rte_cold
 i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index d16ceb6b5d..317a0323bb 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -746,7 +746,7 @@ i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
 int __rte_cold
 i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 774519265b..25a3ef7352 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -763,7 +763,7 @@ i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
 int __rte_cold
 i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 05/25] net/ice: match variable name to other drivers
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (3 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 04/25] net/i40e: match variable name " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 06/25] net/i40e: rename 16-byte descriptor define Anatoly Burakov
                     ` (19 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson

Currently, the ice driver has a variable that have the same semantics as
in other drivers, but has a different name. Rename `rx_ring_dma` to
`rx_ring_phys_addr` for consistency with other drivers.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Make this commit separate

 drivers/net/intel/ice/ice_dcf.c  | 2 +-
 drivers/net/intel/ice/ice_rxtx.c | 8 ++++----
 drivers/net/intel/ice/ice_rxtx.h | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/intel/ice/ice_dcf.c b/drivers/net/intel/ice/ice_dcf.c
index 65c18921f4..fa95aaaba6 100644
--- a/drivers/net/intel/ice/ice_dcf.c
+++ b/drivers/net/intel/ice/ice_dcf.c
@@ -1211,7 +1211,7 @@ ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 
 		vc_qp->rxq.max_pkt_size = rxq[i]->max_pkt_len;
 		vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
-		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_dma;
+		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 		vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
 
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index ba1435b9de..81962a1f9a 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -370,7 +370,7 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 		rx_ctx.dtype = 0; /* No Protocol Based Buffer Split mode */
 	}
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -847,7 +847,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -1273,7 +1273,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	/* Zero all the descriptors in the ring. */
 	memset(rz->addr, 0, ring_size);
 
-	rxq->rx_ring_dma = rz->iova;
+	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = rz->addr;
 
 	/* always reserve more for bulk alloc */
@@ -2500,7 +2500,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
 	rxq->vsi = pf->fdir.fdir_vsi;
 
-	rxq->rx_ring_dma = rz->iova;
+	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, ICE_FDIR_NUM_RX_DESC *
 	       sizeof(union ice_32byte_rx_desc));
 	rxq->rx_ring = (union ice_rx_flex_desc *)rz->addr;
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 500d630679..3c5c014b41 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -93,7 +93,7 @@ enum ice_rx_dtype {
 struct ice_rx_queue {
 	struct rte_mempool *mp; /* mbuf pool to populate RX ring */
 	volatile union ice_rx_flex_desc *rx_ring;/* RX ring virtual address */
-	rte_iova_t rx_ring_dma; /* RX ring DMA address */
+	rte_iova_t rx_ring_phys_addr; /* RX ring DMA address */
 	struct ice_rx_entry *sw_ring; /* address of RX soft ring */
 	uint16_t nb_rx_desc; /* number of RX descriptors */
 	uint16_t rx_free_thresh; /* max free RX desc to hold */
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 06/25] net/i40e: rename 16-byte descriptor define
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (4 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 05/25] net/ice: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 07/25] net/ice: " Anatoly Burakov
                     ` (18 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Aman Singh, Bruce Richardson, Ian Stokes

In preparation for having a common definition for 16-byte and 32-byte Rx
descriptors, rename `RTE_LIBRTE_I40E_16BYTE_RX_DESC` to
`RTE_NET_INTEL_USE_16BYTE_DESC``.

Suggested-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 app/test-pmd/config.c                         |  4 ++--
 config/rte_config.h                           |  2 +-
 doc/guides/nics/i40e.rst                      |  4 +++-
 drivers/net/intel/i40e/i40e_fdir.c            |  2 +-
 drivers/net/intel/i40e/i40e_pf.c              |  2 +-
 drivers/net/intel/i40e/i40e_rxtx.c            | 10 +++++-----
 drivers/net/intel/i40e/i40e_rxtx.h            |  2 +-
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h |  2 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  8 ++++----
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  8 ++++----
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  4 ++--
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  4 ++--
 12 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index e89af21cec..1f43f3bbd8 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -4481,7 +4481,7 @@ ring_rxd_display_dword(union igb_ring_dword dword)
 
 static void
 ring_rx_descriptor_display(const struct rte_memzone *ring_mz,
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 			   portid_t port_id,
 #else
 			   __rte_unused portid_t port_id,
@@ -4490,7 +4490,7 @@ ring_rx_descriptor_display(const struct rte_memzone *ring_mz,
 {
 	struct igb_ring_desc_16_bytes *ring =
 		(struct igb_ring_desc_16_bytes *)ring_mz->addr;
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	int ret;
 	struct rte_eth_dev_info dev_info;
 
diff --git a/config/rte_config.h b/config/rte_config.h
index 86897de75e..6191ba3ae0 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -137,7 +137,7 @@
 
 /* i40e defines */
 #define RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC 1
-// RTE_LIBRTE_I40E_16BYTE_RX_DESC is not set
+/* RTE_NET_INTEL_USE_16BYTE_DESC is not set */
 #define RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF 64
 #define RTE_LIBRTE_I40E_QUEUE_NUM_PER_VF 4
 #define RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM 4
diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index ba592d23fe..234757cd7a 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -961,7 +961,9 @@ Use 16 Bytes RX Descriptor Size
 As i40e PMD supports both 16 and 32 bytes RX descriptor sizes, and 16 bytes size can provide helps to high performance of small packets.
 In ``config/rte_config.h`` set the following to use 16 bytes size RX descriptors::
 
-   #define RTE_LIBRTE_I40E_16BYTE_RX_DESC 1
+   #define RTE_NET_INTEL_USE_16BYTE_DESC 1
+
+Note however that setting this up will make it so that all PMD's supporting this definition will also use 16-byte descriptors.
 
 Input set requirement of each pctype for FDIR
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 94e3ab44e3..734218b67d 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -112,7 +112,7 @@ i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
 	rx_ctx.hbuff = 0;
 	rx_ctx.base = rxq->rx_ring_phys_addr / I40E_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	rx_ctx.dsize = 1;
 #endif
 	rx_ctx.dtype = i40e_header_split_none;
diff --git a/drivers/net/intel/i40e/i40e_pf.c b/drivers/net/intel/i40e/i40e_pf.c
index 4a47a8f7ee..ebe1deeade 100644
--- a/drivers/net/intel/i40e/i40e_pf.c
+++ b/drivers/net/intel/i40e/i40e_pf.c
@@ -401,7 +401,7 @@ i40e_pf_host_hmc_config_rxq(struct i40e_hw *hw,
 	rx_ctx.hbuff = rxq->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT;
 	rx_ctx.base = rxq->dma_ring_addr / I40E_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->ring_len;
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	rx_ctx.dsize = 1;
 #endif
 
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index b4caa3bdd5..5f54bcc225 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -125,7 +125,7 @@ i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
 	} else {
 		mb->vlan_tci = 0;
 	}
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
 		(1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) {
 		mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ |
@@ -217,7 +217,7 @@ static inline uint64_t
 i40e_rxd_build_fdir(volatile union i40e_rx_desc *rxdp, struct rte_mbuf *mb)
 {
 	uint64_t flags = 0;
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint16_t flexbh, flexbl;
 
 	flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
@@ -2925,10 +2925,10 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
 		rxd = &rxq->rx_ring[i];
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		rxd->read.rsvd1 = 0;
 		rxd->read.rsvd2 = 0;
-#endif /* RTE_LIBRTE_I40E_16BYTE_RX_DESC */
+#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
 
 		rxe[i].mbuf = mbuf;
 	}
@@ -3010,7 +3010,7 @@ i40e_rx_queue_init(struct i40e_rx_queue *rxq)
 
 	rx_ctx.base = rxq->rx_ring_phys_addr / I40E_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	rx_ctx.dsize = 1;
 #endif
 	rx_ctx.dtype = rxq->hs_mode;
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 9db044f280..568f0536ac 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -68,7 +68,7 @@ enum i40e_header_split_mode {
 			       I40E_HEADER_SPLIT_SCTP)
 
 /* HW desc structure, both 16-byte and 32-byte types are supported */
-#ifdef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
 #define i40e_rx_desc i40e_16byte_rx_desc
 #else
 #define i40e_rx_desc i40e_32byte_rx_desc
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
index b66a808f9f..7d2bda624b 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
@@ -41,7 +41,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
 		return;
 	}
 
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	struct rte_mbuf *mb0, *mb1;
 	__m128i dma_addr0, dma_addr1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 9c406e7a6f..4469c73c56 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -21,7 +21,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	i40e_rxq_rearm_common(rxq, false);
 }
 
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 /* Handles 32B descriptor FDIR ID processing:
  * rxdp: receive descriptor ring, required to load 2nd 16B half of each desc
  * rx_pkts: required to store metadata back to mbufs
@@ -99,7 +99,7 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 	/* NOT REACHED, see above switch returns */
 	return _mm256_setzero_si256();
 }
-#endif /* RTE_LIBRTE_I40E_16BYTE_RX_DESC */
+#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
 
 #define PKTLEN_SHIFT     10
 
@@ -398,7 +398,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		 * not always performed. Branch over the code when not enabled.
 		 */
 		if (rxq->fdir_enabled) {
-#ifdef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
 			/* 16B descriptor code path:
 			 * RSS and FDIR ID use the same offset in the desc, so
 			 * only one can be present at a time. The code below
@@ -490,7 +490,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			fdir_add_flags = desc_fdir_processing_32b(rxdp, rx_pkts, i, 6);
 			mbuf_flags = _mm256_or_si256(mbuf_flags, fdir_add_flags);
 			/* End 32B desc handling */
-#endif /* RTE_LIBRTE_I40E_16BYTE_RX_DESC */
+#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
 
 		} /* if() on FDIR enabled */
 
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index d8244556c0..bb25acf398 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -23,7 +23,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	i40e_rxq_rearm_common(rxq, true);
 }
 
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 /* Handles 32B descriptor FDIR ID processing:
  * rxdp: receive descriptor ring, required to load 2nd 16B half of each desc
  * rx_pkts: required to store metadata back to mbufs
@@ -102,7 +102,7 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 	/* NOT REACHED, see above switch returns */
 	return _mm256_setzero_si256();
 }
-#endif /* RTE_LIBRTE_I40E_16BYTE_RX_DESC */
+#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
 
 #define PKTLEN_SHIFT     10
 
@@ -419,7 +419,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		 * not always performed. Branch over the code when not enabled.
 		 */
 		if (rxq->fdir_enabled) {
-#ifdef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
 			/* 16B descriptor code path:
 			 * RSS and FDIR ID use the same offset in the desc, so
 			 * only one can be present at a time. The code below
@@ -539,7 +539,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			mbuf_flags =
 				_mm256_or_si256(mbuf_flags, fdir_add_flags);
 			/* End 32B desc handling */
-#endif /* RTE_LIBRTE_I40E_16BYTE_RX_DESC */
+#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
 
 		} /* if() on FDIR enabled */
 
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 317a0323bb..695b4e1040 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -77,7 +77,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
 }
 
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 /* NEON version of FDIR mark extraction for 4 32B descriptors at a time */
 static inline uint32x4_t
 descs_to_fdir_32b(volatile union i40e_rx_desc *rxdp, struct rte_mbuf **rx_pkt)
@@ -284,7 +284,7 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 
 	/* Extract FDIR ID only if FDIR is enabled to avoid useless work */
 	if (rxq->fdir_enabled) {
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		uint32x4_t v_fdir_ol_flags = descs_to_fdir_32b(rxdp, rx_pkts);
 #else
 		(void)rxdp; /* rxdp not required for 16B desc mode */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 25a3ef7352..920089fe3e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -86,7 +86,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
 
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 /* SSE version of FDIR mark extraction for 4 32B descriptors at a time */
 static inline __m128i
 descs_to_fdir_32b(volatile union i40e_rx_desc *rxdp, struct rte_mbuf **rx_pkt)
@@ -285,7 +285,7 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 
 	/* Extract FDIR ID only if FDIR is enabled to avoid useless work */
 	if (rxq->fdir_enabled) {
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		__m128i v_fdir_ol_flags = descs_to_fdir_32b(rxdp, rx_pkts);
 #else
 		(void)rxdp; /* rxdp not required for 16B desc mode */
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 07/25] net/ice: rename 16-byte descriptor define
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (5 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 06/25] net/i40e: rename 16-byte descriptor define Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 08/25] net/iavf: " Anatoly Burakov
                     ` (17 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson

In preparation for having a common definition for 16-byte and 32-byte Rx
descriptors, rename RTE_LIBRTE_ICE_16BYTE_RX_DESC to
RTE_NET_INTEL_USE_16BYTE_DESC.

Suggested-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 drivers/net/intel/ice/ice_dcf.c             |  2 +-
 drivers/net/intel/ice/ice_dcf_ethdev.c      |  2 +-
 drivers/net/intel/ice/ice_rxtx.c            | 30 ++++++++++-----------
 drivers/net/intel/ice/ice_rxtx.h            |  2 +-
 drivers/net/intel/ice/ice_rxtx_common_avx.h |  2 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |  2 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |  2 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  2 +-
 8 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/net/intel/ice/ice_dcf.c b/drivers/net/intel/ice/ice_dcf.c
index fa95aaaba6..2f7c239491 100644
--- a/drivers/net/intel/ice/ice_dcf.c
+++ b/drivers/net/intel/ice/ice_dcf.c
@@ -1214,7 +1214,7 @@ ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 		vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		if (hw->vf_res->vf_cap_flags &
 		    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
 		    hw->supported_rxdid &
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index efff76afa8..d3fd5d7122 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -308,7 +308,7 @@ alloc_rxq_mbufs(struct ice_rx_queue *rxq)
 		rxd = &rxq->rx_ring[i];
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		rxd->read.rsvd1 = 0;
 		rxd->read.rsvd2 = 0;
 #endif
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 81962a1f9a..19569b6a38 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -86,7 +86,7 @@ ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (desc->flow_id != 0xFFFFFFFF) {
 		mb->ol_flags |= RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
 		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
@@ -101,7 +101,7 @@ ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
 {
 	volatile struct ice_32b_rx_flex_desc_comms_ovs *desc =
 			(volatile struct ice_32b_rx_flex_desc_comms_ovs *)rxdp;
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint16_t stat_err;
 #endif
 
@@ -110,7 +110,7 @@ ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
 		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
 	}
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	stat_err = rte_le_to_cpu_16(desc->status_error0);
 	if (likely(stat_err & (1 << ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
 		mb->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
@@ -134,7 +134,7 @@ ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (desc->flow_id != 0xFFFFFFFF) {
 		mb->ol_flags |= RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
 		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
@@ -178,7 +178,7 @@ ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (desc->flow_id != 0xFFFFFFFF) {
 		mb->ol_flags |= RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
 		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
@@ -374,7 +374,7 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	rx_ctx.dsize = 1; /* 32B descriptors */
 #endif
 	rx_ctx.rxmax = rxq->max_pkt_len;
@@ -501,7 +501,7 @@ ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
 			rxd->read.pkt_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf_pay));
 		}
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		rxd->read.rsvd1 = 0;
 		rxd->read.rsvd2 = 0;
 #endif
@@ -1668,7 +1668,7 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
 		mb->vlan_tci = 0;
 	}
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (rte_le_to_cpu_16(rxdp->wb.status_error1) &
 	    (1 << ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) {
 		mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED | RTE_MBUF_F_RX_QINQ |
@@ -1705,7 +1705,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags = 0;
 	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
 	struct ice_vsi *vsi = rxq->vsi;
@@ -1721,7 +1721,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
 
@@ -1783,7 +1783,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
 			ice_rxd_to_vlan_tci(mb, &rxdp[j]);
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 			if (rxq->ts_flag > 0 &&
 			    (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)) {
 				rxq->time_high =
@@ -2023,7 +2023,7 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
 	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
 	struct ice_vsi *vsi = rxq->vsi;
@@ -2151,7 +2151,7 @@ ice_recv_scattered_pkts(void *rx_queue,
 		ice_rxd_to_vlan_tci(first_seg, &rxd);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		if (rxq->ts_flag > 0 &&
 		    (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)) {
 			rxq->time_high =
@@ -2540,7 +2540,7 @@ ice_recv_pkts(void *rx_queue,
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
 	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
 	struct ice_vsi *vsi = rxq->vsi;
@@ -2649,7 +2649,7 @@ ice_recv_pkts(void *rx_queue,
 		ice_rxd_to_vlan_tci(rxm, &rxd);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		if (rxq->ts_flag > 0 &&
 		    (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)) {
 			rxq->time_high =
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 3c5c014b41..d2d521c4f5 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -23,7 +23,7 @@
 #define ICE_CHK_Q_ENA_COUNT        100
 #define ICE_CHK_Q_ENA_INTERVAL_US  100
 
-#ifdef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
 #define ice_rx_flex_desc ice_16b_rx_flex_desc
 #else
 #define ice_rx_flex_desc ice_32b_rx_flex_desc
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
index c62e60c70e..a68cf8512d 100644
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ b/drivers/net/intel/ice/ice_rxtx_common_avx.h
@@ -38,7 +38,7 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
 		return;
 	}
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	struct rte_mbuf *mb0, *mb1;
 	__m128i dma_addr0, dma_addr1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 0c54b325c6..6fe5ffa6f4 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -440,7 +440,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		} /* if() on fdir_enabled */
 
 		if (offload) {
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 			/**
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index bd49be07c9..490d1ae059 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -462,7 +462,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 		} /* if() on fdir_enabled */
 
 		if (do_offload) {
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 			/**
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 97f05ba45e..719b37645e 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -477,7 +477,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
 		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		/**
 		 * needs to load 2nd 16B of each desc for RSS hash parsing,
 		 * will cause performance drop to get into this context.
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 08/25] net/iavf: rename 16-byte descriptor define
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (6 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 07/25] net/ice: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 09/25] net/ixgbe: simplify vector PMD compilation Anatoly Burakov
                     ` (16 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

In preparation for having a common definition for 16-byte and 32-byte Rx
descriptors, rename RTE_LIBRTE_IAVF_16BYTE_RX_DESC to
RTE_NET_INTEL_USE_16BYTE_DESC.

Suggested-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 drivers/net/intel/iavf/iavf_rxtx.c            | 14 +++++++-------
 drivers/net/intel/iavf/iavf_rxtx.h            |  4 ++--
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   | 10 +++++-----
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c | 10 +++++-----
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  2 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    | 18 +++++++++---------
 drivers/net/intel/iavf/iavf_vchnl.c           |  2 +-
 7 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index d23d2df807..fd6c7d3a3e 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -345,7 +345,7 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		rxd = &rxq->rx_ring[i];
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		rxd->read.rsvd1 = 0;
 		rxd->read.rsvd2 = 0;
 #endif
@@ -401,7 +401,7 @@ iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
 {
 	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
 			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint16_t stat_err;
 #endif
 
@@ -410,7 +410,7 @@ iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
 		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	stat_err = rte_le_to_cpu_16(desc->status_error0);
 	if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
 		mb->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
@@ -434,7 +434,7 @@ iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (desc->flow_id != 0xFFFFFFFF) {
 		mb->ol_flags |= RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
 		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
@@ -476,7 +476,7 @@ iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (desc->flow_id != 0xFFFFFFFF) {
 		mb->ol_flags |= RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
 		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
@@ -1177,7 +1177,7 @@ iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
 		mb->vlan_tci = 0;
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (rte_le_to_cpu_16(rxdp->wb.status_error1) &
 	    (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) {
 		mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED |
@@ -1301,7 +1301,7 @@ static inline uint64_t
 iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
 {
 	uint64_t flags = 0;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint16_t flexbh;
 
 	flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 62b5a67c84..6198643605 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -195,7 +195,7 @@ union iavf_32b_rx_flex_desc {
 };
 
 /* HW desc structure, both 16-byte and 32-byte types are supported */
-#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
 #define iavf_rx_desc iavf_16byte_rx_desc
 #define iavf_rx_flex_desc iavf_16b_rx_flex_desc
 #else
@@ -740,7 +740,7 @@ void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq,
 			    const volatile void *desc,
 			    uint16_t rx_id)
 {
-#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
 	const volatile union iavf_16byte_rx_desc *rx_desc = desc;
 
 	printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64"\n",
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index 88e35dc3e9..d94a8b0ae1 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -496,7 +496,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 
 	struct iavf_adapter *adapter = rxq->vsi->adapter;
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
 	const uint32_t *type_table = adapter->ptype_tbl;
@@ -524,7 +524,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 	if (!(rxdp->wb.status_error0 &
 			rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	bool is_tsinit = false;
 	uint8_t inflection_point = 0;
 	__m256i hw_low_last = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, rxq->phc_time);
@@ -946,7 +946,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		} /* if() on fdir_enabled */
 
 		if (offload) {
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 			/**
 			 * needs to load 2nd 16B of each desc,
 			 * will cause performance drop to get into this context.
@@ -1360,7 +1360,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 			inflection_point = (inflection_point <= burst) ? inflection_point : 0;
 			switch (inflection_point) {
@@ -1411,7 +1411,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			break;
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (received > 0 && (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP))
 		rxq->phc_time = *RTE_MBUF_DYNFIELD(rx_pkts[received - 1], iavf_timestamp_dynfield_offset, rte_mbuf_timestamp_t *);
 #endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index f2af028bef..895b8717f7 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -585,7 +585,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 					bool offload)
 {
 	struct iavf_adapter *adapter = rxq->vsi->adapter;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
 #ifdef IAVF_RX_PTYPE_OFFLOAD
@@ -616,7 +616,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 	      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 #ifdef IAVF_RX_TS_OFFLOAD
 	uint8_t inflection_point = 0;
 	bool is_tsinit = false;
@@ -1096,7 +1096,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 		__m256i mb0_1 = _mm512_extracti64x4_epi64(mb0_3, 0);
 		__m256i mb2_3 = _mm512_extracti64x4_epi64(mb0_3, 1);
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		if (offload) {
 #if defined(IAVF_RX_RSS_OFFLOAD) || defined(IAVF_RX_TS_OFFLOAD)
 			/**
@@ -1548,7 +1548,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 #ifdef IAVF_RX_TS_OFFLOAD
 		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 			inflection_point = (inflection_point <= burst) ? inflection_point : 0;
@@ -1601,7 +1601,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 			break;
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 #ifdef IAVF_RX_TS_OFFLOAD
 	if (received > 0 && (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP))
 		rxq->phc_time = *RTE_MBUF_DYNFIELD(rx_pkts[received - 1],
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index 38e9a206d9..f577fd7f3e 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -269,7 +269,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 		return;
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	struct rte_mbuf *mb0, *mb1;
 	__m128i dma_addr0, dma_addr1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 2e41079e88..8ccdec7f8a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -204,7 +204,7 @@ flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 	return fdir_flags;
 }
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 static inline void
 flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
 		       struct rte_mbuf **rx_pkts)
@@ -325,7 +325,7 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	/* merge the flags */
 	flags = _mm_or_si128(flags, rss_vlan);
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (rxq->rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2) {
 		const __m128i l2tag2_mask =
 			_mm_set1_epi32(1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S);
@@ -388,7 +388,7 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 			_mm_extract_epi32(fdir_id0_3, 3);
 	} /* if() on fdir_enabled */
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
 		flags = _mm_or_si128(flags, _mm_set1_epi32(iavf_timestamp_dynflag));
 #endif
@@ -724,7 +724,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	int pos;
 	uint64_t var;
 	struct iavf_adapter *adapter = rxq->vsi->adapter;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
 	const uint32_t *ptype_tbl = adapter->ptype_tbl;
@@ -796,7 +796,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint8_t inflection_point = 0;
 	bool is_tsinit = false;
 	__m128i hw_low_last = _mm_set_epi32(0, 0, 0, (uint32_t)rxq->phc_time);
@@ -845,7 +845,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	     pos += IAVF_VPMD_DESCS_PER_LOOP,
 	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
 		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		__m128i descs_bh[IAVF_VPMD_DESCS_PER_LOOP] = {_mm_setzero_si128()};
 #endif
 		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
@@ -914,7 +914,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
 		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		/**
 		 * needs to load 2nd 16B of each desc,
 		 * will cause performance drop to get into this context.
@@ -1121,7 +1121,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 			inflection_point = (inflection_point <= var) ? inflection_point : 0;
 			switch (inflection_point) {
@@ -1157,7 +1157,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 			break;
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 #ifdef IAVF_RX_TS_OFFLOAD
 	if (nb_pkts_recd > 0 && (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP))
 		rxq->phc_time = *RTE_MBUF_DYNFIELD(rx_pkts[nb_pkts_recd - 1],
diff --git a/drivers/net/intel/iavf/iavf_vchnl.c b/drivers/net/intel/iavf/iavf_vchnl.c
index 6feca8435e..da1ef5900f 100644
--- a/drivers/net/intel/iavf/iavf_vchnl.c
+++ b/drivers/net/intel/iavf/iavf_vchnl.c
@@ -1260,7 +1260,7 @@ iavf_configure_queues(struct iavf_adapter *adapter,
 		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 		vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
 		vc_qp->rxq.crc_disable = rxq[i]->crc_len != 0 ? 1 : 0;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 		if (vf->vf_res->vf_cap_flags &
 		    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
 			if (vf->supported_rxdid & RTE_BIT64(rxq[i]->rxdid)) {
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 09/25] net/ixgbe: simplify vector PMD compilation
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (7 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 08/25] net/iavf: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 10/25] net/ixgbe: replace always-true check Anatoly Burakov
                     ` (15 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

Currently, there's a bunch of vector PMD-related stuff that's being
compiled based on what architecture the code is being compiled for.

Simplify it by removing necessary #ifdef's and have it so that vector PMD
compilation is controlled in one place - using a compile flag from the
build file.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 drivers/net/intel/ixgbe/ixgbe_rxtx.c | 27 +++++++++++++++------------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h |  4 ----
 drivers/net/intel/ixgbe/meson.build  |  2 ++
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index f75821029d..f5fd50584a 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -2678,9 +2678,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ci_tx_queue *txq)
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
 					ixgbe_txq_vec_setup(txq) == 0)) {
 			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
 			dev->recycle_tx_mbufs_reuse = ixgbe_recycle_tx_mbufs_reuse_vec;
-#endif
 			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
 		} else
 		dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
@@ -5049,10 +5047,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 			PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
 					    "callback (port=%d).",
 				     dev->data->port_id);
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
 			dev->recycle_rx_descriptors_refill =
 				ixgbe_recycle_rx_descriptors_refill_vec;
-#endif
 			dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
 		} else if (adapter->rx_bulk_alloc_allowed) {
 			PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
@@ -5081,9 +5077,7 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 				    "burst size no less than %d (port=%d).",
 			     RTE_IXGBE_DESCS_PER_LOOP,
 			     dev->data->port_id);
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
 		dev->recycle_rx_descriptors_refill = ixgbe_recycle_rx_descriptors_refill_vec;
-#endif
 		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
 	} else if (adapter->rx_bulk_alloc_allowed) {
 		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
@@ -5871,10 +5865,8 @@ ixgbe_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	recycle_rxq_info->receive_tail = &rxq->rx_tail;
 
 	if (adapter->rx_vec_allowed) {
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
 		recycle_rxq_info->refill_requirement = RTE_IXGBE_RXQ_REARM_THRESH;
 		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
-#endif
 	} else {
 		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
 		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
@@ -6239,11 +6231,9 @@ ixgbe_config_rss_filter(struct rte_eth_dev *dev,
 	return 0;
 }
 
-/* Stubs needed for linkage when RTE_ARCH_PPC_64, RTE_ARCH_RISCV or
- * RTE_ARCH_LOONGARCH is set.
+/* Stubs needed for linkage when vectorized PMD isn't supported.
  */
-#if defined(RTE_ARCH_PPC_64) || defined(RTE_ARCH_RISCV) || \
-	defined(RTE_ARCH_LOONGARCH)
+#ifndef IXGBE_VPMD_SUPPORTED
 int
 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
 {
@@ -6268,6 +6258,12 @@ ixgbe_recv_scattered_pkts_vec(
 	return 0;
 }
 
+void
+ixgbe_recycle_rx_descriptors_refill_vec(void __rte_unused * rx_queue,
+		uint16_t __rte_unused nb_mbufs)
+{
+}
+
 int
 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
 {
@@ -6282,6 +6278,13 @@ ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
 	return 0;
 }
 
+uint16_t
+ixgbe_recycle_tx_mbufs_reuse_vec(void __rte_unused * tx_queue,
+		struct rte_eth_recycle_rxq_info __rte_unused * recycle_rxq_info)
+{
+	return 0;
+}
+
 int
 ixgbe_txq_vec_setup(struct ci_tx_queue *txq __rte_unused)
 {
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index c86714804f..bcd5db87e8 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -35,10 +35,8 @@
 
 #define RTE_IXGBE_DESCS_PER_LOOP    4
 
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
 #define RTE_IXGBE_RXQ_REARM_THRESH      32
 #define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
-#endif
 
 #define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_PMD_IXGBE_RX_MAX_BURST) * \
 		    sizeof(union ixgbe_adv_rx_desc))
@@ -102,10 +100,8 @@ struct ixgbe_rx_queue {
 	uint8_t            using_ipsec;
 	/**< indicates that IPsec RX feature is in use */
 #endif
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
 	uint16_t            rxrearm_nb;     /**< number of remaining to be re-armed */
 	uint16_t            rxrearm_start;  /**< the idx we start the re-arming from */
-#endif
 	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
 	uint16_t            queue_id; /**< RX queue index. */
 	uint16_t            reg_idx;  /**< RX queue register index. */
diff --git a/drivers/net/intel/ixgbe/meson.build b/drivers/net/intel/ixgbe/meson.build
index d1122bb9cd..8adb1567d1 100644
--- a/drivers/net/intel/ixgbe/meson.build
+++ b/drivers/net/intel/ixgbe/meson.build
@@ -26,9 +26,11 @@ deps += ['hash', 'security']
 if arch_subdir == 'x86'
     sources += files('ixgbe_rxtx_vec_sse.c')
     sources += files('ixgbe_recycle_mbufs_vec_common.c')
+    cflags += ['-DIXGBE_VPMD_SUPPORTED']
 elif arch_subdir == 'arm'
     sources += files('ixgbe_rxtx_vec_neon.c')
     sources += files('ixgbe_recycle_mbufs_vec_common.c')
+    cflags += ['-DIXGBE_VPMD_SUPPORTED']
 endif
 
 headers = files('rte_pmd_ixgbe.h')
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 10/25] net/ixgbe: replace always-true check
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (8 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 09/25] net/ixgbe: simplify vector PMD compilation Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 11/25] net/ixgbe: clean up definitions Anatoly Burakov
                     ` (14 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

There is an option `RTE_PMD_PACKET_PREFETCH` in `rte_config.h` that is
always set to 1 by default, and that controls some prefetch behavior in the
driver. However, there's another prefetch behavior that is controlled by
`RTE_PMD_USE_PREFETCH`, which is always defined unconditionally (literally
using `#if 1` construct).

Replace the check to also use `RTE_PMD_PACKET_PREFETCH` to allow turning it
off at build time.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 drivers/net/intel/ixgbe/ixgbe_rxtx.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index f5fd50584a..3b49ef9fbb 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -74,11 +74,7 @@
 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
 		(RTE_MBUF_F_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
 
-#if 1
-#define RTE_PMD_USE_PREFETCH
-#endif
-
-#ifdef RTE_PMD_USE_PREFETCH
+#ifdef RTE_PMD_PACKET_PREFETCH
 /*
  * Prefetch a cache line into all cache levels.
  */
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 11/25] net/ixgbe: clean up definitions
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (9 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 10/25] net/ixgbe: replace always-true check Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 12/25] net/i40e: " Anatoly Burakov
                     ` (13 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

This patch does the following cleanups:

- Remove RTE_ prefix from internal definitions
- Mark vector-PMD related definitions with a special naming convention

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 drivers/net/intel/ixgbe/ixgbe_ipsec.c         | 10 ++--
 drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 60 +++++++++----------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          | 22 +++----
 .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |  2 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c | 56 ++++++++---------
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  | 60 +++++++++----------
 6 files changed, 105 insertions(+), 105 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/intel/ixgbe/ixgbe_ipsec.c
index 778004cbe4..df0964a51d 100644
--- a/drivers/net/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/intel/ixgbe/ixgbe_ipsec.c
@@ -15,20 +15,20 @@
 #include "ixgbe_ethdev.h"
 #include "ixgbe_ipsec.h"
 
-#define RTE_IXGBE_REGISTER_POLL_WAIT_5_MS  5
+#define IXGBE_REGISTER_POLL_WAIT_5_MS  5
 
 #define IXGBE_WAIT_RREAD \
 	IXGBE_WRITE_REG_THEN_POLL_MASK(hw, IXGBE_IPSRXIDX, reg_val, \
-	IPSRXIDX_READ, RTE_IXGBE_REGISTER_POLL_WAIT_5_MS)
+	IPSRXIDX_READ, IXGBE_REGISTER_POLL_WAIT_5_MS)
 #define IXGBE_WAIT_RWRITE \
 	IXGBE_WRITE_REG_THEN_POLL_MASK(hw, IXGBE_IPSRXIDX, reg_val, \
-	IPSRXIDX_WRITE, RTE_IXGBE_REGISTER_POLL_WAIT_5_MS)
+	IPSRXIDX_WRITE, IXGBE_REGISTER_POLL_WAIT_5_MS)
 #define IXGBE_WAIT_TREAD \
 	IXGBE_WRITE_REG_THEN_POLL_MASK(hw, IXGBE_IPSTXIDX, reg_val, \
-	IPSRXIDX_READ, RTE_IXGBE_REGISTER_POLL_WAIT_5_MS)
+	IPSRXIDX_READ, IXGBE_REGISTER_POLL_WAIT_5_MS)
 #define IXGBE_WAIT_TWRITE \
 	IXGBE_WRITE_REG_THEN_POLL_MASK(hw, IXGBE_IPSTXIDX, reg_val, \
-	IPSRXIDX_WRITE, RTE_IXGBE_REGISTER_POLL_WAIT_5_MS)
+	IPSRXIDX_WRITE, IXGBE_REGISTER_POLL_WAIT_5_MS)
 
 #define CMP_IP(a, b) (\
 	(a).ipv6[0] == (b).ipv6[0] && \
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 3b49ef9fbb..22d0aea1a7 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -103,7 +103,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 	struct ci_tx_entry *txep;
 	uint32_t status;
 	int i, nb_free = 0;
-	struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
+	struct rte_mbuf *m, *free[IXGBE_TX_MAX_FREE_BUF_SZ];
 
 	/* check DD bit on threshold descriptor */
 	status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
@@ -124,7 +124,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 		if (unlikely(m == NULL))
 			continue;
 
-		if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
+		if (nb_free >= IXGBE_TX_MAX_FREE_BUF_SZ ||
 		    (nb_free > 0 && m->pool != free[0]->pool)) {
 			rte_mempool_put_bulk(free[0]->pool,
 					     (void **)free, nb_free);
@@ -336,7 +336,7 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 		return 0;
 
 	/* Try to transmit at least chunks of TX_MAX_BURST pkts */
-	if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
+	if (likely(nb_pkts <= IXGBE_TX_MAX_BURST))
 		return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
 
 	/* transmit more than the max burst, in chunks of TX_MAX_BURST */
@@ -344,7 +344,7 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	while (nb_pkts) {
 		uint16_t ret, n;
 
-		n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
+		n = (uint16_t)RTE_MIN(nb_pkts, IXGBE_TX_MAX_BURST);
 		ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
 		nb_tx = (uint16_t)(nb_tx + ret);
 		nb_pkts = (uint16_t)(nb_pkts - ret);
@@ -1590,7 +1590,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 	 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
 	 * reference packets that are ready to be received.
 	 */
-	for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
+	for (i = 0; i < IXGBE_RX_MAX_BURST;
 	     i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
 		/* Read desc statuses backwards to avoid race condition */
 		for (j = 0; j < LOOK_AHEAD; j++)
@@ -1787,7 +1787,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return 0;
 }
 
-/* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
+/* split requests into chunks of size IXGBE_RX_MAX_BURST */
 uint16_t
 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts)
@@ -1797,7 +1797,7 @@ ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
 	if (unlikely(nb_pkts == 0))
 		return 0;
 
-	if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
+	if (likely(nb_pkts <= IXGBE_RX_MAX_BURST))
 		return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
 
 	/* request is relatively large, chunk it up */
@@ -1805,7 +1805,7 @@ ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
 	while (nb_pkts) {
 		uint16_t ret, n;
 
-		n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
+		n = (uint16_t)RTE_MIN(nb_pkts, IXGBE_RX_MAX_BURST);
 		ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
 		nb_rx = (uint16_t)(nb_rx + ret);
 		nb_pkts = (uint16_t)(nb_pkts - ret);
@@ -2510,8 +2510,8 @@ ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
 #ifdef RTE_LIB_SECURITY
 			!(txq->using_ipsec) &&
 #endif
-			txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
-		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
+			txq->tx_rs_thresh >= IXGBE_TX_MAX_BURST) {
+		if (txq->tx_rs_thresh <= IXGBE_TX_MAX_FREE_BUF_SZ &&
 				rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
 					txq->sw_ring_vec != NULL)) {
@@ -2666,10 +2666,10 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ci_tx_queue *txq)
 #ifdef RTE_LIB_SECURITY
 			!(txq->using_ipsec) &&
 #endif
-			(txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
+			(txq->tx_rs_thresh >= IXGBE_TX_MAX_BURST)) {
 		PMD_INIT_LOG(DEBUG, "Using simple tx code path");
 		dev->tx_pkt_prepare = NULL;
-		if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
+		if (txq->tx_rs_thresh <= IXGBE_TX_MAX_FREE_BUF_SZ &&
 				rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
 				(rte_eal_process_type() != RTE_PROC_PRIMARY ||
 					ixgbe_txq_vec_setup(txq) == 0)) {
@@ -2684,9 +2684,9 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ci_tx_queue *txq)
 				" - offloads = 0x%" PRIx64,
 				txq->offloads);
 		PMD_INIT_LOG(DEBUG,
-				" - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
+				" - tx_rs_thresh = %lu [IXGBE_TX_MAX_BURST=%lu]",
 				(unsigned long)txq->tx_rs_thresh,
-				(unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
+				(unsigned long)IXGBE_TX_MAX_BURST);
 		dev->tx_pkt_burst = ixgbe_xmit_pkts;
 		dev->tx_pkt_prepare = ixgbe_prep_pkts;
 	}
@@ -3031,17 +3031,17 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
 
 	/*
 	 * Make sure the following pre-conditions are satisfied:
-	 *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
+	 *   rxq->rx_free_thresh >= IXGBE_RX_MAX_BURST
 	 *   rxq->rx_free_thresh < rxq->nb_rx_desc
 	 *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
 	 * Scattered packets are not supported.  This should be checked
 	 * outside of this function.
 	 */
-	if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
+	if (!(rxq->rx_free_thresh >= IXGBE_RX_MAX_BURST)) {
 		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
 			     "rxq->rx_free_thresh=%d, "
-			     "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
-			     rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
+			     "IXGBE_RX_MAX_BURST=%d",
+			     rxq->rx_free_thresh, IXGBE_RX_MAX_BURST);
 		ret = -EINVAL;
 	} else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
 		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
@@ -3075,7 +3075,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
 	 */
 	if (adapter->rx_bulk_alloc_allowed)
 		/* zero out extra memory */
-		len += RTE_PMD_IXGBE_RX_MAX_BURST;
+		len += IXGBE_RX_MAX_BURST;
 
 	/*
 	 * Zero out HW ring memory. Zero out extra memory at the end of
@@ -3319,7 +3319,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 */
 	len = nb_desc;
 	if (adapter->rx_bulk_alloc_allowed)
-		len += RTE_PMD_IXGBE_RX_MAX_BURST;
+		len += IXGBE_RX_MAX_BURST;
 
 	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
 					  sizeof(struct ixgbe_rx_entry) * len,
@@ -4617,7 +4617,7 @@ ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
 	/* PFDMA Tx General Switch Control Enables VMDQ loopback */
 	if (cfg->enable_loop_back) {
 		IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
-		for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
+		for (i = 0; i < IXGBE_VMTXSW_REGISTER_COUNT; i++)
 			IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
 	}
 
@@ -5071,7 +5071,7 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 	} else if (adapter->rx_vec_allowed) {
 		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
 				    "burst size no less than %d (port=%d).",
-			     RTE_IXGBE_DESCS_PER_LOOP,
+			     IXGBE_VPMD_DESCS_PER_LOOP,
 			     dev->data->port_id);
 		dev->recycle_rx_descriptors_refill = ixgbe_recycle_rx_descriptors_refill_vec;
 		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
@@ -5655,7 +5655,7 @@ ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
 
 	/* Wait until RX Enable ready */
-	poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
+	poll_ms = IXGBE_REGISTER_POLL_WAIT_10_MS;
 	do {
 		rte_delay_ms(1);
 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
@@ -5692,7 +5692,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
 
 	/* Wait until RX Enable bit clear */
-	poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
+	poll_ms = IXGBE_REGISTER_POLL_WAIT_10_MS;
 	do {
 		rte_delay_ms(1);
 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
@@ -5700,7 +5700,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	if (!poll_ms)
 		PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
 
-	rte_delay_us(RTE_IXGBE_WAIT_100_US);
+	rte_delay_us(IXGBE_WAIT_100_US);
 
 	ixgbe_rx_queue_release_mbufs(rxq);
 	ixgbe_reset_rx_queue(adapter, rxq);
@@ -5732,7 +5732,7 @@ ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 
 	/* Wait until TX Enable ready */
 	if (hw->mac.type == ixgbe_mac_82599EB) {
-		poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
+		poll_ms = IXGBE_REGISTER_POLL_WAIT_10_MS;
 		do {
 			rte_delay_ms(1);
 			txdctl = IXGBE_READ_REG(hw,
@@ -5768,9 +5768,9 @@ ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 
 	/* Wait until TX queue is empty */
 	if (hw->mac.type == ixgbe_mac_82599EB) {
-		poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
+		poll_ms = IXGBE_REGISTER_POLL_WAIT_10_MS;
 		do {
-			rte_delay_us(RTE_IXGBE_WAIT_100_US);
+			rte_delay_us(IXGBE_WAIT_100_US);
 			txtdh = IXGBE_READ_REG(hw,
 					       IXGBE_TDH(txq->reg_idx));
 			txtdt = IXGBE_READ_REG(hw,
@@ -5788,7 +5788,7 @@ ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 
 	/* Wait until TX Enable bit clear */
 	if (hw->mac.type == ixgbe_mac_82599EB) {
-		poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
+		poll_ms = IXGBE_REGISTER_POLL_WAIT_10_MS;
 		do {
 			rte_delay_ms(1);
 			txdctl = IXGBE_READ_REG(hw,
@@ -5861,7 +5861,7 @@ ixgbe_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	recycle_rxq_info->receive_tail = &rxq->rx_tail;
 
 	if (adapter->rx_vec_allowed) {
-		recycle_rxq_info->refill_requirement = RTE_IXGBE_RXQ_REARM_THRESH;
+		recycle_rxq_info->refill_requirement = IXGBE_VPMD_RXQ_REARM_THRESH;
 		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
 	} else {
 		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index bcd5db87e8..5742e845cf 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -29,16 +29,16 @@
 #define	IXGBE_MIN_RING_DESC	64
 #define	IXGBE_MAX_RING_DESC	8192
 
-#define RTE_PMD_IXGBE_TX_MAX_BURST 32
-#define RTE_PMD_IXGBE_RX_MAX_BURST 32
-#define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
+#define IXGBE_TX_MAX_BURST            32
+#define IXGBE_RX_MAX_BURST            32
+#define IXGBE_TX_MAX_FREE_BUF_SZ      64
 
-#define RTE_IXGBE_DESCS_PER_LOOP    4
+#define IXGBE_VPMD_DESCS_PER_LOOP     4
 
-#define RTE_IXGBE_RXQ_REARM_THRESH      32
-#define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
+#define IXGBE_VPMD_RXQ_REARM_THRESH   32
+#define IXGBE_VPMD_RX_BURST           IXGBE_VPMD_RXQ_REARM_THRESH
 
-#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_PMD_IXGBE_RX_MAX_BURST) * \
+#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + IXGBE_RX_MAX_BURST) * \
 		    sizeof(union ixgbe_adv_rx_desc))
 
 #ifdef RTE_PMD_PACKET_PREFETCH
@@ -47,9 +47,9 @@
 #define rte_packet_prefetch(p)  do {} while(0)
 #endif
 
-#define RTE_IXGBE_REGISTER_POLL_WAIT_10_MS  10
-#define RTE_IXGBE_WAIT_100_US               100
-#define RTE_IXGBE_VMTXSW_REGISTER_COUNT     2
+#define IXGBE_REGISTER_POLL_WAIT_10_MS  10
+#define IXGBE_WAIT_100_US               100
+#define IXGBE_VMTXSW_REGISTER_COUNT     2
 
 #define IXGBE_TX_MAX_SEG                    40
 
@@ -118,7 +118,7 @@ struct ixgbe_rx_queue {
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
 	/** hold packets to return to application */
-	struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
+	struct rte_mbuf *rx_stage[IXGBE_RX_MAX_BURST * 2];
 	const struct rte_memzone *mz;
 };
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
index 018010820f..4442dc7b39 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
@@ -19,7 +19,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 	uint32_t n;
 	uint32_t i;
 	int nb_free = 0;
-	struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
+	struct rte_mbuf *m, *free[IXGBE_TX_MAX_FREE_BUF_SZ];
 
 	/* check DD bit on threshold descriptor */
 	status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index f8916d44e8..02d9dbb573 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -29,24 +29,24 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
 					  (void *)rxep,
-					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
+					  IXGBE_VPMD_RXQ_REARM_THRESH) < 0)) {
+		if (rxq->rxrearm_nb + IXGBE_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < IXGBE_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read),
 					  zero);
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
+			IXGBE_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
 	p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < IXGBE_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		mb0 = rxep[0].mbuf;
 		mb1 = rxep[1].mbuf;
 
@@ -66,11 +66,11 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
 	}
 
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += IXGBE_VPMD_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
 
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= IXGBE_VPMD_RXQ_REARM_THRESH;
 
 	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
@@ -275,11 +275,11 @@ desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
 }
 
 /**
- * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
+ * vPMD raw receive routine, only accept(nb_pkts >= IXGBE_VPMD_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - nb_pkts < IXGBE_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a IXGBE_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -303,8 +303,8 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	uint8_t vlan_flags;
 	uint16_t udp_p_flag = 0; /* Rx Descriptor UDP header present */
 
-	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
+	/* nb_pkts has to be floor-aligned to IXGBE_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IXGBE_VPMD_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
@@ -316,7 +316,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > IXGBE_VPMD_RXQ_REARM_THRESH)
 		ixgbe_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -345,9 +345,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 * D. fill info. from desc to mbuf
 	 */
 	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
-			pos += RTE_IXGBE_DESCS_PER_LOOP,
-			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
-		uint64x2_t descs[RTE_IXGBE_DESCS_PER_LOOP];
+			pos += IXGBE_VPMD_DESCS_PER_LOOP,
+			rxdp += IXGBE_VPMD_DESCS_PER_LOOP) {
+		uint64x2_t descs[IXGBE_VPMD_DESCS_PER_LOOP];
 		uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
 		uint8x16x2_t sterr_tmp1, sterr_tmp2;
 		uint64x2_t mbp1, mbp2;
@@ -426,7 +426,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			/* and with mask to extract bits, flipping 1-0 */
 			*(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK;
 
-			split_packet += RTE_IXGBE_DESCS_PER_LOOP;
+			split_packet += IXGBE_VPMD_DESCS_PER_LOOP;
 		}
 
 		/* C.4 expand DD bit to saturate UINT8 */
@@ -436,7 +436,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 					IXGBE_UINT8_BIT - 1));
 		stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
 
-		rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP);
+		rte_prefetch_non_temporal(rxdp + IXGBE_VPMD_DESCS_PER_LOOP);
 
 		/* D.3 copy final 1,2 data to rx_pkts */
 		vst1q_u8((uint8_t *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
@@ -448,7 +448,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 		/* C.5 calc available number of desc */
 		if (unlikely(stat == 0)) {
-			nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP;
+			nb_pkts_recd += IXGBE_VPMD_DESCS_PER_LOOP;
 		} else {
 			nb_pkts_recd += rte_ctz32(stat) / IXGBE_UINT8_BIT;
 			break;
@@ -464,11 +464,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 }
 
 /**
- * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
+ * vPMD receive routine, only accept(nb_pkts >= IXGBE_VPMD_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - nb_pkts < IXGBE_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a IXGBE_VPMD_DESC_PER_LOOP power-of-two
  */
 uint16_t
 ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -481,15 +481,15 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles scattered packets
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - nb_pkts < IXGBE_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a IXGBE_VPMD_DESCS_PER_LOOP power-of-two
  */
 static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
 	struct ixgbe_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
+	uint8_t split_flags[IXGBE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
@@ -527,15 +527,15 @@ ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > RTE_IXGBE_MAX_RX_BURST) {
+	while (nb_pkts > IXGBE_VPMD_RX_BURST) {
 		uint16_t burst;
 
 		burst = ixgbe_recv_scattered_burst_vec(rx_queue,
 						       rx_pkts + retval,
-						       RTE_IXGBE_MAX_RX_BURST);
+						       IXGBE_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < RTE_IXGBE_MAX_RX_BURST)
+		if (burst < IXGBE_VPMD_RX_BURST)
 			return retval;
 	}
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index 9417e5b11f..ea57631932 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -31,23 +31,23 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
-				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
+				 IXGBE_VPMD_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + IXGBE_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < IXGBE_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
+			IXGBE_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < IXGBE_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		__m128i vaddr0, vaddr1;
 
 		mb0 = rxep[0].mbuf;
@@ -76,11 +76,11 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
 	}
 
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += IXGBE_VPMD_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
 
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= IXGBE_VPMD_RXQ_REARM_THRESH;
 
 	rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ?
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
@@ -262,10 +262,10 @@ static inline uint32_t get_packet_type(int index,
 				       uint32_t etqf_check,
 				       uint32_t tunnel_check)
 {
-	if (etqf_check & (0x02 << (index * RTE_IXGBE_DESCS_PER_LOOP)))
+	if (etqf_check & (0x02 << (index * IXGBE_VPMD_DESCS_PER_LOOP)))
 		return RTE_PTYPE_UNKNOWN;
 
-	if (tunnel_check & (0x02 << (index * RTE_IXGBE_DESCS_PER_LOOP))) {
+	if (tunnel_check & (0x02 << (index * IXGBE_VPMD_DESCS_PER_LOOP))) {
 		pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
 		return ptype_table_tn[pkt_info];
 	}
@@ -320,11 +320,11 @@ desc_to_ptype_v(__m128i descs[4], uint16_t pkt_type_mask,
 }
 
 /**
- * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
+ * vPMD raw receive routine, only accept(nb_pkts >= IXGBE_VPMD_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - nb_pkts < IXGBE_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a IXGBE_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -369,10 +369,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 * So we need to make some restrictions to ensure that
 	 * `rx_tail` will not exceed `rxrearm_start`.
 	 */
-	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_RXQ_REARM_THRESH);
+	nb_pkts = RTE_MIN(nb_pkts, IXGBE_VPMD_RXQ_REARM_THRESH);
 
-	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
+	/* nb_pkts has to be floor-aligned to IXGBE_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IXGBE_VPMD_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
@@ -384,7 +384,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > IXGBE_VPMD_RXQ_REARM_THRESH)
 		ixgbe_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -446,9 +446,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 * D. fill info. from desc to mbuf
 	 */
 	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
-			pos += RTE_IXGBE_DESCS_PER_LOOP,
-			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
-		__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
+			pos += IXGBE_VPMD_DESCS_PER_LOOP,
+			rxdp += IXGBE_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[IXGBE_VPMD_DESCS_PER_LOOP];
 		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
 		__m128i zero, staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
@@ -554,7 +554,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
 			/* store the resulting 32-bit value */
 			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
-			split_packet += RTE_IXGBE_DESCS_PER_LOOP;
+			split_packet += IXGBE_VPMD_DESCS_PER_LOOP;
 		}
 
 		/* C.3 calc available number of desc */
@@ -572,7 +572,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		/* C.4 calc available number of desc */
 		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
-		if (likely(var != RTE_IXGBE_DESCS_PER_LOOP))
+		if (likely(var != IXGBE_VPMD_DESCS_PER_LOOP))
 			break;
 	}
 
@@ -585,11 +585,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 }
 
 /**
- * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
+ * vPMD receiIXGBE_VPMD_RX_BURSTt(nb_pkts >= IXGBE_VPMD_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - nb_pkts <IXGBE_VPMD_RX_BURSTOOP, just return no packet
+ * - floor align nb_pkts to a IXGBE_VPMD_DESCS_PER_LOOP power-of-two
  */
 uint16_t
 ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -602,15 +602,15 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles scattered packets
  *
  * Notice:
- * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
+ * - nb_pkts < IXGBE_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a IXGBE_VPMD_DESCS_PER_LOOP power-of-two
  */
 static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
 	struct ixgbe_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
+	uint8_t split_flags[IXGBE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
@@ -648,15 +648,15 @@ ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > RTE_IXGBE_MAX_RX_BURST) {
+	while (nb_pkts > IXGBE_VPMD_RX_BURST) {
 		uint16_t burst;
 
 		burst = ixgbe_recv_scattered_burst_vec(rx_queue,
 						       rx_pkts + retval,
-						       RTE_IXGBE_MAX_RX_BURST);
+						       IXGBE_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < RTE_IXGBE_MAX_RX_BURST)
+		if (burst < IXGBE_VPMD_RX_BURST)
 			return retval;
 	}
 
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 12/25] net/i40e: clean up definitions
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (10 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 11/25] net/ixgbe: clean up definitions Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 13/25] net/ice: " Anatoly Burakov
                     ` (12 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Ian Stokes, Bruce Richardson

This commit does the following cleanups:

- Remove RTE_ prefix from internal definitions
- Mark vector-PMD related definitions with a special naming convention
- Remove unused definitions
- Create "descriptors per loop" for different vector implementations
  (regular for SSE, Neon, AltiVec, wide for AVX2, AVX512)

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 drivers/net/intel/i40e/i40e_rxtx.c            | 42 +++++++--------
 drivers/net/intel/i40e/i40e_rxtx.h            | 17 +++---
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 18 +++----
 .../net/intel/i40e/i40e_rxtx_vec_altivec.c    | 48 ++++++++---------
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   | 32 ++++++-----
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c | 32 ++++++-----
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   | 53 +++++++++----------
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    | 48 ++++++++---------
 8 files changed, 142 insertions(+), 148 deletions(-)

diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 5f54bcc225..2e61076378 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -424,11 +424,11 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
 	int ret = 0;
 
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-	if (!(rxq->rx_free_thresh >= RTE_PMD_I40E_RX_MAX_BURST)) {
+	if (!(rxq->rx_free_thresh >= I40E_RX_MAX_BURST)) {
 		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
 			     "rxq->rx_free_thresh=%d, "
-			     "RTE_PMD_I40E_RX_MAX_BURST=%d",
-			     rxq->rx_free_thresh, RTE_PMD_I40E_RX_MAX_BURST);
+			     "I40E_RX_MAX_BURST=%d",
+			     rxq->rx_free_thresh, I40E_RX_MAX_BURST);
 		ret = -EINVAL;
 	} else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
 		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
@@ -484,7 +484,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 	 * Scan LOOK_AHEAD descriptors at a time to determine which
 	 * descriptors reference packets that are ready to be received.
 	 */
-	for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; i+=I40E_LOOK_AHEAD,
+	for (i = 0; i < I40E_RX_MAX_BURST; i += I40E_LOOK_AHEAD,
 			rxdp += I40E_LOOK_AHEAD, rxep += I40E_LOOK_AHEAD) {
 		/* Read desc statuses backwards to avoid race condition */
 		for (j = I40E_LOOK_AHEAD - 1; j >= 0; j--) {
@@ -680,11 +680,11 @@ i40e_recv_pkts_bulk_alloc(void *rx_queue,
 	if (unlikely(nb_pkts == 0))
 		return 0;
 
-	if (likely(nb_pkts <= RTE_PMD_I40E_RX_MAX_BURST))
+	if (likely(nb_pkts <= I40E_RX_MAX_BURST))
 		return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
 
 	while (nb_pkts) {
-		n = RTE_MIN(nb_pkts, RTE_PMD_I40E_RX_MAX_BURST);
+		n = RTE_MIN(nb_pkts, I40E_RX_MAX_BURST);
 		count = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
 		nb_rx = (uint16_t)(nb_rx + count);
 		nb_pkts = (uint16_t)(nb_pkts - count);
@@ -1334,9 +1334,9 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq)
 	struct ci_tx_entry *txep;
 	uint16_t tx_rs_thresh = txq->tx_rs_thresh;
 	uint16_t i = 0, j = 0;
-	struct rte_mbuf *free[RTE_I40E_TX_MAX_FREE_BUF_SZ];
-	const uint16_t k = RTE_ALIGN_FLOOR(tx_rs_thresh, RTE_I40E_TX_MAX_FREE_BUF_SZ);
-	const uint16_t m = tx_rs_thresh % RTE_I40E_TX_MAX_FREE_BUF_SZ;
+	struct rte_mbuf *free[I40E_TX_MAX_FREE_BUF_SZ];
+	const uint16_t k = RTE_ALIGN_FLOOR(tx_rs_thresh, I40E_TX_MAX_FREE_BUF_SZ);
+	const uint16_t m = tx_rs_thresh % I40E_TX_MAX_FREE_BUF_SZ;
 
 	if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
 			rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
@@ -1350,13 +1350,13 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq)
 
 	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
 		if (k) {
-			for (j = 0; j != k; j += RTE_I40E_TX_MAX_FREE_BUF_SZ) {
-				for (i = 0; i < RTE_I40E_TX_MAX_FREE_BUF_SZ; ++i, ++txep) {
+			for (j = 0; j != k; j += I40E_TX_MAX_FREE_BUF_SZ) {
+				for (i = 0; i < I40E_TX_MAX_FREE_BUF_SZ; ++i, ++txep) {
 					free[i] = txep->mbuf;
 					txep->mbuf = NULL;
 				}
 				rte_mempool_put_bulk(free[0]->pool, (void **)free,
-						RTE_I40E_TX_MAX_FREE_BUF_SZ);
+						I40E_TX_MAX_FREE_BUF_SZ);
 			}
 		}
 
@@ -2146,7 +2146,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 * Allocating a little more memory because vectorized/bulk_alloc Rx
 	 * functions doesn't check boundaries each time.
 	 */
-	len += RTE_PMD_I40E_RX_MAX_BURST;
+	len += I40E_RX_MAX_BURST;
 
 	ring_size = RTE_ALIGN(len * sizeof(union i40e_rx_desc),
 			      I40E_DMA_MEM_ALIGN);
@@ -2166,7 +2166,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
 
-	len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST);
+	len = (uint16_t)(nb_desc + I40E_RX_MAX_BURST);
 
 	/* Allocate the software ring. */
 	rxq->sw_ring =
@@ -2370,7 +2370,7 @@ i40e_dev_tx_queue_setup_runtime(struct rte_eth_dev *dev,
 
 	/* check vector conflict */
 	if (ad->tx_vec_allowed) {
-		if (txq->tx_rs_thresh > RTE_I40E_TX_MAX_FREE_BUF_SZ ||
+		if (txq->tx_rs_thresh > I40E_TX_MAX_FREE_BUF_SZ ||
 		    i40e_txq_vec_setup(txq)) {
 			PMD_DRV_LOG(ERR, "Failed vector tx setup.");
 			return -EINVAL;
@@ -2379,7 +2379,7 @@ i40e_dev_tx_queue_setup_runtime(struct rte_eth_dev *dev,
 	/* check simple tx conflict */
 	if (ad->tx_simple_allowed) {
 		if ((txq->offloads & ~RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) != 0 ||
-				txq->tx_rs_thresh < RTE_PMD_I40E_TX_MAX_BURST) {
+				txq->tx_rs_thresh < I40E_TX_MAX_BURST) {
 			PMD_DRV_LOG(ERR, "No-simple tx is required.");
 			return -EINVAL;
 		}
@@ -2675,7 +2675,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
 	if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
-		len = (uint16_t)(rxq->nb_rx_desc + RTE_PMD_I40E_RX_MAX_BURST);
+		len = (uint16_t)(rxq->nb_rx_desc + I40E_RX_MAX_BURST);
 	else
 #endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */
 		len = rxq->nb_rx_desc;
@@ -2684,7 +2684,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 		((volatile char *)rxq->rx_ring)[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
-	for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; ++i)
+	for (i = 0; i < I40E_RX_MAX_BURST; ++i)
 		rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
 
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
@@ -3276,7 +3276,7 @@ i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	recycle_rxq_info->receive_tail = &rxq->rx_tail;
 
 	if (ad->rx_vec_allowed) {
-		recycle_rxq_info->refill_requirement = RTE_I40E_RXQ_REARM_THRESH;
+		recycle_rxq_info->refill_requirement = I40E_VPMD_RXQ_REARM_THRESH;
 		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
 	} else {
 		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
@@ -3501,9 +3501,9 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct ci_tx_queue *txq)
 	ad->tx_simple_allowed =
 		(txq->offloads ==
 		 (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) &&
-		 txq->tx_rs_thresh >= RTE_PMD_I40E_TX_MAX_BURST);
+		 txq->tx_rs_thresh >= I40E_TX_MAX_BURST);
 	ad->tx_vec_allowed = (ad->tx_simple_allowed &&
-			txq->tx_rs_thresh <= RTE_I40E_TX_MAX_FREE_BUF_SZ);
+			txq->tx_rs_thresh <= I40E_TX_MAX_FREE_BUF_SZ);
 
 	if (ad->tx_vec_allowed)
 		PMD_INIT_LOG(DEBUG, "Vector Tx can be enabled on Tx queue %u.",
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 568f0536ac..3dca32b1ba 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -7,15 +7,14 @@
 
 #include "../common/tx.h"
 
-#define RTE_PMD_I40E_RX_MAX_BURST 32
-#define RTE_PMD_I40E_TX_MAX_BURST 32
+#define I40E_RX_MAX_BURST 32
+#define I40E_TX_MAX_BURST 32
 
-#define RTE_I40E_VPMD_RX_BURST        32
-#define RTE_I40E_VPMD_TX_BURST        32
-#define RTE_I40E_RXQ_REARM_THRESH      32
-#define RTE_I40E_MAX_RX_BURST          RTE_I40E_RXQ_REARM_THRESH
-#define RTE_I40E_TX_MAX_FREE_BUF_SZ    64
-#define RTE_I40E_DESCS_PER_LOOP    4
+#define I40E_VPMD_RX_BURST            32
+#define I40E_VPMD_RXQ_REARM_THRESH    32
+#define I40E_TX_MAX_FREE_BUF_SZ       64
+#define I40E_VPMD_DESCS_PER_LOOP      4
+#define I40E_VPMD_DESCS_PER_LOOP_WIDE 8
 
 #define I40E_RXBUF_SZ_1024 1024
 #define I40E_RXBUF_SZ_2048 2048
@@ -97,7 +96,7 @@ struct i40e_rx_queue {
 	uint16_t rx_nb_avail; /**< number of staged packets ready */
 	uint16_t rx_next_avail; /**< index of next staged packets */
 	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[RTE_PMD_I40E_RX_MAX_BURST * 2];
+	struct rte_mbuf *rx_stage[I40E_RX_MAX_BURST * 2];
 #endif
 
 	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
index 7d2bda624b..8fc7cd5bd4 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
@@ -25,19 +25,19 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+				 I40E_VPMD_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			__m128i dma_addr0;
 			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
+			I40E_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
@@ -47,7 +47,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		__m128i vaddr0, vaddr1;
 
 		mb0 = rxep[0].mbuf;
@@ -79,7 +79,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
 		__m512i dma_addr0_3, dma_addr4_7;
 		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
 		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+		for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH;
 				i += 8, rxep += 8, rxdp += 8) {
 			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
 			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
@@ -152,7 +152,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
 		__m256i dma_addr0_1, dma_addr2_3;
 		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+		for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH;
 				i += 4, rxep += 4, rxdp += 4) {
 			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
 			__m256i vaddr0_1, vaddr2_3;
@@ -197,7 +197,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
 
 #endif
 
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH;
 	rx_id = rxq->rxrearm_start - 1;
 
 	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
@@ -205,7 +205,7 @@ i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
 		rx_id = rxq->nb_rx_desc - 1;
 	}
 
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH;
 
 	/* Update the tail pointer on the NIC */
 	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index 01dee811ba..568891cfb2 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -35,23 +35,23 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+				 I40E_VPMD_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			dma_addr0 = (__vector unsigned long){};
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				vec_st(dma_addr0, 0,
 					RTE_CAST_PTR(__vector unsigned long *, &rxdp[i].read));
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
+			I40E_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		__vector unsigned long vaddr0, vaddr1;
 		uintptr_t p0, p1;
 
@@ -86,7 +86,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		vec_st(dma_addr1, 0, RTE_CAST_PTR(__vector unsigned long *, &rxdp++->read));
 	}
 
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH;
 	rx_id = rxq->rxrearm_start - 1;
 
 	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
@@ -94,7 +94,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		rx_id = rxq->nb_rx_desc - 1;
 	}
 
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH;
 
 	/* Update the tail pointer on the NIC */
 	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
@@ -188,11 +188,11 @@ desc_to_ptype_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts,
 }
 
 /**
- * vPMD raw receive routine, only accept(nb_pkts >= RTE_I40E_DESCS_PER_LOOP)
+ * vPMD raw receive routine, only accept(nb_pkts >= I40E_VPMD_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -215,8 +215,8 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		};
 	__vector unsigned long dd_check, eop_check;
 
-	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP);
+	/* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
@@ -228,7 +228,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH)
 		i40e_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -271,9 +271,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 */
 
 	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
-			pos += RTE_I40E_DESCS_PER_LOOP,
-			rxdp += RTE_I40E_DESCS_PER_LOOP) {
-		__vector unsigned long descs[RTE_I40E_DESCS_PER_LOOP];
+			pos += I40E_VPMD_DESCS_PER_LOOP,
+			rxdp += I40E_VPMD_DESCS_PER_LOOP) {
+		__vector unsigned long descs[I40E_VPMD_DESCS_PER_LOOP];
 		__vector unsigned char pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
 		__vector unsigned short staterr, sterr_tmp1, sterr_tmp2;
 		__vector unsigned long mbp1, mbp2; /* two mbuf pointer
@@ -406,7 +406,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			/* store the resulting 32-bit value */
 			*split_packet = (vec_ld(0,
 					 (__vector unsigned int *)&eop_bits))[0];
-			split_packet += RTE_I40E_DESCS_PER_LOOP;
+			split_packet += I40E_VPMD_DESCS_PER_LOOP;
 
 			/* zero-out next pointers */
 			rx_pkts[pos]->next = NULL;
@@ -433,7 +433,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		var = rte_popcount64((vec_ld(0,
 			(__vector unsigned long *)&staterr)[0]));
 		nb_pkts_recd += var;
-		if (likely(var != RTE_I40E_DESCS_PER_LOOP))
+		if (likely(var != I40E_VPMD_DESCS_PER_LOOP))
 			break;
 	}
 
@@ -446,7 +446,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 }
 
  /* Notice:
-  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+  * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
   */
 uint16_t
 i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -459,14 +459,14 @@ i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  *
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 	struct i40e_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
@@ -505,15 +505,15 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > RTE_I40E_VPMD_RX_BURST) {
+	while (nb_pkts > I40E_VPMD_RX_BURST) {
 		uint16_t burst;
 
 		burst = i40e_recv_scattered_burst_vec(rx_queue,
 						      rx_pkts + retval,
-						      RTE_I40E_VPMD_RX_BURST);
+						      I40E_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < RTE_I40E_VPMD_RX_BURST)
+		if (burst < I40E_VPMD_RX_BURST)
 			return retval;
 	}
 
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 4469c73c56..a13dd9bc78 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -108,8 +108,6 @@ static __rte_always_inline uint16_t
 _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
-#define RTE_I40E_DESCS_PER_LOOP_AVX 8
-
 	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
@@ -118,13 +116,13 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 	rte_prefetch0(rxdp);
 
-	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP_AVX */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP_AVX);
+	/* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP_WIDE */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP_WIDE);
 
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH)
 		i40e_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -262,8 +260,8 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 	uint16_t i, received;
 	for (i = 0, received = 0; i < nb_pkts;
-			i += RTE_I40E_DESCS_PER_LOOP_AVX,
-			rxdp += RTE_I40E_DESCS_PER_LOOP_AVX) {
+			i += I40E_VPMD_DESCS_PER_LOOP_WIDE,
+			rxdp += I40E_VPMD_DESCS_PER_LOOP_WIDE) {
 		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
 		_mm256_storeu_si256((void *)&rx_pkts[i],
 				_mm256_loadu_si256((void *)&sw_ring[i]));
@@ -299,7 +297,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 		if (split_packet) {
 			int j;
-			for (j = 0; j < RTE_I40E_DESCS_PER_LOOP_AVX; j++)
+			for (j = 0; j < I40E_VPMD_DESCS_PER_LOOP_WIDE; j++)
 				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
 		}
 
@@ -577,7 +575,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 					12, 4, 14, 6);
 			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
 			*(uint64_t *)split_packet = _mm_cvtsi128_si64(split_bits);
-			split_packet += RTE_I40E_DESCS_PER_LOOP_AVX;
+			split_packet += I40E_VPMD_DESCS_PER_LOOP_WIDE;
 		}
 
 		/* perform dd_check */
@@ -590,7 +588,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		burst += rte_popcount64(_mm_cvtsi128_si64(
 				_mm256_castsi256_si128(status0_7)));
 		received += burst;
-		if (burst != RTE_I40E_DESCS_PER_LOOP_AVX)
+		if (burst != I40E_VPMD_DESCS_PER_LOOP_WIDE)
 			break;
 	}
 
@@ -607,7 +605,7 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 /*
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 i40e_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -619,14 +617,14 @@ i40e_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 /*
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
 	struct i40e_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec_avx2(rxq, rx_pkts, nb_pkts,
@@ -661,19 +659,19 @@ i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles scattered packets.
  * Main receive routine that can handle arbitrary burst sizes
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 i40e_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
 	uint16_t retval = 0;
-	while (nb_pkts > RTE_I40E_VPMD_RX_BURST) {
+	while (nb_pkts > I40E_VPMD_RX_BURST) {
 		uint16_t burst = i40e_recv_scattered_burst_vec_avx2(rx_queue,
-				rx_pkts + retval, RTE_I40E_VPMD_RX_BURST);
+				rx_pkts + retval, I40E_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < RTE_I40E_VPMD_RX_BURST)
+		if (burst < I40E_VPMD_RX_BURST)
 			return retval;
 	}
 	return retval + i40e_recv_scattered_burst_vec_avx2(rx_queue,
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index bb25acf398..f0320a221c 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -15,8 +15,6 @@
 
 #include <rte_vect.h>
 
-#define RTE_I40E_DESCS_PER_LOOP_AVX 8
-
 static __rte_always_inline void
 i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 {
@@ -119,13 +117,13 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 	rte_prefetch0(rxdp);
 
-	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP_AVX */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP_AVX);
+	/* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP_WIDE */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP_WIDE);
 
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH)
 		i40e_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -245,8 +243,8 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	uint16_t i, received;
 
 	for (i = 0, received = 0; i < nb_pkts;
-			i += RTE_I40E_DESCS_PER_LOOP_AVX,
-			rxdp += RTE_I40E_DESCS_PER_LOOP_AVX) {
+			i += I40E_VPMD_DESCS_PER_LOOP_WIDE,
+			rxdp += I40E_VPMD_DESCS_PER_LOOP_WIDE) {
 		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
 		_mm256_storeu_si256((void *)&rx_pkts[i],
 				_mm256_loadu_si256((void *)&sw_ring[i]));
@@ -312,7 +310,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		if (split_packet) {
 			int j;
 
-			for (j = 0; j < RTE_I40E_DESCS_PER_LOOP_AVX; j++)
+			for (j = 0; j < I40E_VPMD_DESCS_PER_LOOP_WIDE; j++)
 				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
 		}
 
@@ -642,7 +640,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
 			*(uint64_t *)split_packet =
 				_mm_cvtsi128_si64(split_bits);
-			split_packet += RTE_I40E_DESCS_PER_LOOP_AVX;
+			split_packet += I40E_VPMD_DESCS_PER_LOOP_WIDE;
 		}
 
 		/* perform dd_check */
@@ -657,7 +655,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		burst += rte_popcount64(_mm_cvtsi128_si64
 				(_mm256_castsi256_si128(status0_7)));
 		received += burst;
-		if (burst != RTE_I40E_DESCS_PER_LOOP_AVX)
+		if (burst != I40E_VPMD_DESCS_PER_LOOP_WIDE)
 			break;
 	}
 
@@ -674,7 +672,7 @@ _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 /**
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 i40e_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -686,7 +684,7 @@ i40e_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 i40e_recv_scattered_burst_vec_avx512(void *rx_queue,
@@ -694,7 +692,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue,
 				     uint16_t nb_pkts)
 {
 	struct i40e_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec_avx512(rxq, rx_pkts, nb_pkts,
@@ -729,7 +727,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue,
  * vPMD receive routine that reassembles scattered packets.
  * Main receive routine that can handle arbitrary burst sizes
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 i40e_recv_scattered_pkts_vec_avx512(void *rx_queue,
@@ -738,12 +736,12 @@ i40e_recv_scattered_pkts_vec_avx512(void *rx_queue,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > RTE_I40E_VPMD_RX_BURST) {
+	while (nb_pkts > I40E_VPMD_RX_BURST) {
 		uint16_t burst = i40e_recv_scattered_burst_vec_avx512(rx_queue,
-				rx_pkts + retval, RTE_I40E_VPMD_RX_BURST);
+				rx_pkts + retval, I40E_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < RTE_I40E_VPMD_RX_BURST)
+		if (burst < I40E_VPMD_RX_BURST)
 			return retval;
 	}
 	return retval + i40e_recv_scattered_burst_vec_avx512(rx_queue,
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 695b4e1040..955382652c 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -33,21 +33,21 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
 					  (void *)rxep,
-					  RTE_I40E_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+					  I40E_VPMD_RXQ_REARM_THRESH) < 0)) {
+		if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
+			I40E_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		mb0 = rxep[0].mbuf;
 		mb1 = rxep[1].mbuf;
 
@@ -62,7 +62,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
 	}
 
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH;
 	rx_id = rxq->rxrearm_start - 1;
 
 	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
@@ -70,7 +70,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		rx_id = rxq->nb_rx_desc - 1;
 	}
 
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH;
 
 	rte_io_wmb();
 	/* Update the tail pointer on the NIC */
@@ -325,11 +325,11 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
 }
 
 /**
- * vPMD raw receive routine, only accept(nb_pkts >= RTE_I40E_DESCS_PER_LOOP)
+ * vPMD raw receive routine, only accept(nb_pkts >= I40E_VPMD_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
@@ -368,8 +368,8 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 		0, 0, 0       /* ignore non-length fields */
 		};
 
-	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP);
+	/* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
@@ -381,7 +381,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH)
 		i40e_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -405,9 +405,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 	 */
 
 	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
-			pos += RTE_I40E_DESCS_PER_LOOP,
-			rxdp += RTE_I40E_DESCS_PER_LOOP) {
-		uint64x2_t descs[RTE_I40E_DESCS_PER_LOOP];
+			pos += I40E_VPMD_DESCS_PER_LOOP,
+			rxdp += I40E_VPMD_DESCS_PER_LOOP) {
+		uint64x2_t descs[I40E_VPMD_DESCS_PER_LOOP];
 		uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
 		uint16x8x2_t sterr_tmp1, sterr_tmp2;
 		uint64x2_t mbp1, mbp2;
@@ -502,9 +502,8 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 
 		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 
-		if (likely(pos + RTE_I40E_DESCS_PER_LOOP < nb_pkts)) {
-			rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP);
-		}
+		if (likely(pos + I40E_VPMD_DESCS_PER_LOOP < nb_pkts))
+			rte_prefetch_non_temporal(rxdp + I40E_VPMD_DESCS_PER_LOOP);
 
 		/* C.1 4=>2 filter staterr info only */
 		sterr_tmp2 = vzipq_u16(vreinterpretq_u16_u64(descs[1]),
@@ -538,7 +537,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 			/* store the resulting 32-bit value */
 			vst1q_lane_u32((uint32_t *)split_packet,
 				       vreinterpretq_u32_u8(eop_bits), 0);
-			split_packet += RTE_I40E_DESCS_PER_LOOP;
+			split_packet += I40E_VPMD_DESCS_PER_LOOP;
 
 			/* zero-out next pointers */
 			rx_pkts[pos]->next = NULL;
@@ -555,7 +554,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 
 		/* C.4 calc available number of desc */
 		if (unlikely(stat == 0)) {
-			nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP;
+			nb_pkts_recd += I40E_VPMD_DESCS_PER_LOOP;
 		} else {
 			nb_pkts_recd += rte_ctz64(stat) / I40E_UINT16_BIT;
 			break;
@@ -572,7 +571,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 
  /*
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 i40e_recv_pkts_vec(void *__rte_restrict rx_queue,
@@ -585,7 +584,7 @@ i40e_recv_pkts_vec(void *__rte_restrict rx_queue,
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  *
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -593,7 +592,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 {
 
 	struct i40e_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
@@ -633,15 +632,15 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > RTE_I40E_VPMD_RX_BURST) {
+	while (nb_pkts > I40E_VPMD_RX_BURST) {
 		uint16_t burst;
 
 		burst = i40e_recv_scattered_burst_vec(rx_queue,
 						      rx_pkts + retval,
-						      RTE_I40E_VPMD_RX_BURST);
+						      I40E_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < RTE_I40E_VPMD_RX_BURST)
+		if (burst < I40E_VPMD_RX_BURST)
 			return retval;
 	}
 
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 920089fe3e..7e7f4c0895 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -31,23 +31,23 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+				 I40E_VPMD_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
+			I40E_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		__m128i vaddr0, vaddr1;
 
 		mb0 = rxep[0].mbuf;
@@ -72,7 +72,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
 	}
 
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH;
 	rx_id = rxq->rxrearm_start - 1;
 
 	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
@@ -80,7 +80,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		rx_id = rxq->nb_rx_desc - 1;
 	}
 
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH;
 
 	/* Update the tail pointer on the NIC */
 	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
@@ -340,11 +340,11 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 }
 
 /**
- * vPMD raw receive routine, only accept(nb_pkts >= RTE_I40E_DESCS_PER_LOOP)
+ * vPMD raw receive routine, only accept(nb_pkts >= I40E_VPMD_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -376,8 +376,8 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
 	__m128i dd_check, eop_check;
 
-	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP);
+	/* nb_pkts has to be floor-aligned to I40E_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, I40E_VPMD_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
@@ -389,7 +389,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > I40E_VPMD_RXQ_REARM_THRESH)
 		i40e_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -443,9 +443,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 */
 
 	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
-			pos += RTE_I40E_DESCS_PER_LOOP,
-			rxdp += RTE_I40E_DESCS_PER_LOOP) {
-		__m128i descs[RTE_I40E_DESCS_PER_LOOP];
+			pos += I40E_VPMD_DESCS_PER_LOOP,
+			rxdp += I40E_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[I40E_VPMD_DESCS_PER_LOOP];
 		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
 		__m128i zero, staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
@@ -559,7 +559,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
 			/* store the resulting 32-bit value */
 			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
-			split_packet += RTE_I40E_DESCS_PER_LOOP;
+			split_packet += I40E_VPMD_DESCS_PER_LOOP;
 		}
 
 		/* C.3 calc available number of desc */
@@ -575,7 +575,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		/* C.4 calc available number of desc */
 		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
-		if (likely(var != RTE_I40E_DESCS_PER_LOOP))
+		if (likely(var != I40E_VPMD_DESCS_PER_LOOP))
 			break;
 	}
 
@@ -589,7 +589,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
  /*
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -602,7 +602,7 @@ i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  *
  * Notice:
- * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < I40E_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -610,7 +610,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 {
 
 	struct i40e_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
@@ -650,15 +650,15 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > RTE_I40E_VPMD_RX_BURST) {
+	while (nb_pkts > I40E_VPMD_RX_BURST) {
 		uint16_t burst;
 
 		burst = i40e_recv_scattered_burst_vec(rx_queue,
 						      rx_pkts + retval,
-						      RTE_I40E_VPMD_RX_BURST);
+						      I40E_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < RTE_I40E_VPMD_RX_BURST)
+		if (burst < I40E_VPMD_RX_BURST)
 			return retval;
 	}
 
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 13/25] net/ice: clean up definitions
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (11 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 12/25] net/i40e: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 14/25] net/iavf: " Anatoly Burakov
                     ` (11 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson

This commit does the following cleanups:

- Mark vector-PMD related definitions with a special naming convention
- Remove unused definitions
- Create "descriptors per loop" for different vector implementations
  (regular for SSE, Neon, wide for AVX2, AVX512)

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 drivers/net/intel/ice/ice_rxtx.h            |  6 ++--
 drivers/net/intel/ice/ice_rxtx_common_avx.h | 18 +++++-----
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   | 24 ++++++-------
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c | 30 ++++++++--------
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    | 40 ++++++++++-----------
 5 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index d2d521c4f5..52c753ba7c 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -35,10 +35,10 @@
 
 #define ICE_VPMD_RX_BURST           32
 #define ICE_VPMD_TX_BURST           32
-#define ICE_RXQ_REARM_THRESH        64
-#define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
+#define ICE_VPMD_RXQ_REARM_THRESH   64
 #define ICE_TX_MAX_FREE_BUF_SZ      64
-#define ICE_DESCS_PER_LOOP          4
+#define ICE_VPMD_DESCS_PER_LOOP      4
+#define ICE_VPMD_DESCS_PER_LOOP_WIDE 8
 
 #define ICE_FDIR_PKT_LEN	512
 
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
index a68cf8512d..d1c772bf06 100644
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ b/drivers/net/intel/ice/ice_rxtx_common_avx.h
@@ -21,20 +21,20 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+				 ICE_VPMD_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + ICE_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			__m128i dma_addr0;
 
 			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < ICE_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
+			ICE_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
@@ -44,7 +44,7 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < ICE_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		__m128i vaddr0, vaddr1;
 
 		mb0 = rxep[0].mbuf;
@@ -84,7 +84,7 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
 		__m512i dma_addr0_3, dma_addr4_7;
 		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
 		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+		for (i = 0; i < ICE_VPMD_RXQ_REARM_THRESH;
 				i += 8, rxep += 8, rxdp += 8) {
 			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
 			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
@@ -163,7 +163,7 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
 		__m256i dma_addr0_1, dma_addr2_3;
 		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
+		for (i = 0; i < ICE_VPMD_RXQ_REARM_THRESH;
 				i += 4, rxep += 4, rxdp += 4) {
 			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
 			__m256i vaddr0_1, vaddr2_3;
@@ -216,11 +216,11 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
 
 #endif
 
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += ICE_VPMD_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
 
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= ICE_VPMD_RXQ_REARM_THRESH;
 
 	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 6fe5ffa6f4..5ed669fc30 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -37,8 +37,6 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet,
 			    bool offload)
 {
-#define ICE_DESCS_PER_LOOP_AVX 8
-
 	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
@@ -48,13 +46,13 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 	rte_prefetch0(rxdp);
 
-	/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP_AVX */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP_AVX);
+	/* nb_pkts has to be floor-aligned to ICE_VPMD_DESCS_PER_LOOP_WIDE */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_VPMD_DESCS_PER_LOOP_WIDE);
 
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > ICE_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > ICE_VPMD_RXQ_REARM_THRESH)
 		ice_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -239,8 +237,8 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	uint16_t i, received;
 
 	for (i = 0, received = 0; i < nb_pkts;
-	     i += ICE_DESCS_PER_LOOP_AVX,
-	     rxdp += ICE_DESCS_PER_LOOP_AVX) {
+	     i += ICE_VPMD_DESCS_PER_LOOP_WIDE,
+	     rxdp += ICE_VPMD_DESCS_PER_LOOP_WIDE) {
 		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
 		_mm256_storeu_si256((void *)&rx_pkts[i],
 				    _mm256_loadu_si256((void *)&sw_ring[i]));
@@ -286,7 +284,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		if (split_packet) {
 			int j;
 
-			for (j = 0; j < ICE_DESCS_PER_LOOP_AVX; j++)
+			for (j = 0; j < ICE_VPMD_DESCS_PER_LOOP_WIDE; j++)
 				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
 		}
 
@@ -634,7 +632,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
 			*(uint64_t *)split_packet =
 				_mm_cvtsi128_si64(split_bits);
-			split_packet += ICE_DESCS_PER_LOOP_AVX;
+			split_packet += ICE_VPMD_DESCS_PER_LOOP_WIDE;
 		}
 
 		/* perform dd_check */
@@ -650,7 +648,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
-		if (burst != ICE_DESCS_PER_LOOP_AVX)
+		if (burst != ICE_VPMD_DESCS_PER_LOOP_WIDE)
 			break;
 	}
 
@@ -667,7 +665,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 /**
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -688,7 +686,7 @@ ice_recv_pkts_vec_avx2_offload(void *rx_queue, struct rte_mbuf **rx_pkts,
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static __rte_always_inline uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -730,7 +728,7 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles scattered packets.
  * Main receive routine that can handle arbitrary burst sizes
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static __rte_always_inline uint16_t
 ice_recv_scattered_pkts_vec_avx2_common(void *rx_queue,
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index 490d1ae059..e52e9e9ceb 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -7,8 +7,6 @@
 
 #include <rte_vect.h>
 
-#define ICE_DESCS_PER_LOOP_AVX 8
-
 static __rte_always_inline void
 ice_rxq_rearm(struct ice_rx_queue *rxq)
 {
@@ -49,13 +47,13 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 
 	rte_prefetch0(rxdp);
 
-	/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP_AVX */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP_AVX);
+	/* nb_pkts has to be floor-aligned to ICE_VPMD_DESCS_PER_LOOP_WIDE */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_VPMD_DESCS_PER_LOOP_WIDE);
 
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > ICE_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > ICE_VPMD_RXQ_REARM_THRESH)
 		ice_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -224,8 +222,8 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 	uint16_t i, received;
 
 	for (i = 0, received = 0; i < nb_pkts;
-	     i += ICE_DESCS_PER_LOOP_AVX,
-	     rxdp += ICE_DESCS_PER_LOOP_AVX) {
+	     i += ICE_VPMD_DESCS_PER_LOOP_WIDE,
+	     rxdp += ICE_VPMD_DESCS_PER_LOOP_WIDE) {
 		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
 		_mm256_storeu_si256((void *)&rx_pkts[i],
 				    _mm256_loadu_si256((void *)&sw_ring[i]));
@@ -292,7 +290,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 		if (split_packet) {
 			int j;
 
-			for (j = 0; j < ICE_DESCS_PER_LOOP_AVX; j++)
+			for (j = 0; j < ICE_VPMD_DESCS_PER_LOOP_WIDE; j++)
 				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
 		}
 
@@ -660,7 +658,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
 			*(uint64_t *)split_packet =
 				_mm_cvtsi128_si64(split_bits);
-			split_packet += ICE_DESCS_PER_LOOP_AVX;
+			split_packet += ICE_VPMD_DESCS_PER_LOOP_WIDE;
 		}
 
 		/* perform dd_check */
@@ -676,7 +674,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
-		if (burst != ICE_DESCS_PER_LOOP_AVX)
+		if (burst != ICE_VPMD_DESCS_PER_LOOP_WIDE)
 			break;
 	}
 
@@ -693,7 +691,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 
 /**
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -704,7 +702,7 @@ ice_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 /**
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_pkts_vec_avx512_offload(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -717,7 +715,7 @@ ice_recv_pkts_vec_avx512_offload(void *rx_queue, struct rte_mbuf **rx_pkts,
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 ice_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -758,7 +756,7 @@ ice_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 ice_recv_scattered_burst_vec_avx512_offload(void *rx_queue,
@@ -801,7 +799,7 @@ ice_recv_scattered_burst_vec_avx512_offload(void *rx_queue,
  * vPMD receive routine that reassembles scattered packets.
  * Main receive routine that can handle arbitrary burst sizes
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_scattered_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -825,7 +823,7 @@ ice_recv_scattered_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles scattered packets.
  * Main receive routine that can handle arbitrary burst sizes
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_scattered_pkts_vec_avx512_offload(void *rx_queue,
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 719b37645e..36da5b5d1b 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -42,23 +42,23 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+				 ICE_VPMD_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + ICE_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < ICE_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
+			ICE_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < ICE_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		__m128i vaddr0, vaddr1;
 
 		mb0 = rxep[0].mbuf;
@@ -91,11 +91,11 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
 	}
 
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += ICE_VPMD_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
 
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= ICE_VPMD_RXQ_REARM_THRESH;
 
 	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
 			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
@@ -294,11 +294,11 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 }
 
 /**
- * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP)
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_VPMD_DESCS_PER_LOOP)
  *
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
+ * - floor align nb_pkts to a ICE_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -355,8 +355,8 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
 						 0x0000000200000002LL);
 
-	/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);
+	/* nb_pkts has to be floor-aligned to ICE_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_VPMD_DESCS_PER_LOOP);
 
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
@@ -368,7 +368,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > ICE_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > ICE_VPMD_RXQ_REARM_THRESH)
 		ice_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -406,9 +406,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 */
 
 	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
-	     pos += ICE_DESCS_PER_LOOP,
-	     rxdp += ICE_DESCS_PER_LOOP) {
-		__m128i descs[ICE_DESCS_PER_LOOP];
+	     pos += ICE_VPMD_DESCS_PER_LOOP,
+	     rxdp += ICE_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[ICE_VPMD_DESCS_PER_LOOP];
 		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
 		__m128i staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
@@ -556,7 +556,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
 			/* store the resulting 32-bit value */
 			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
-			split_packet += ICE_DESCS_PER_LOOP;
+			split_packet += ICE_VPMD_DESCS_PER_LOOP;
 		}
 
 		/* C.3 calc available number of desc */
@@ -573,7 +573,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		/* C.4 calc available number of desc */
 		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
-		if (likely(var != ICE_DESCS_PER_LOOP))
+		if (likely(var != ICE_VPMD_DESCS_PER_LOOP))
 			break;
 	}
 
@@ -587,7 +587,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
 /**
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
  *   numbers of DD bits
  */
@@ -602,7 +602,7 @@ ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  *
  * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts < ICE_VPMD_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 14/25] net/iavf: clean up definitions
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (12 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 13/25] net/ice: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 15/25] net/ixgbe: create common Rx queue structure Anatoly Burakov
                     ` (10 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

This commit does the following cleanups:

- Mark vector-PMD related definitions with a special naming convention
- Create "descriptors per loop" for different vector implementations
  (regular for SSE, Neon, AltiVec, wide for AVX2, AVX512)
- Make definitions' names match naming conventions used in other drivers

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Add this commit

 drivers/net/intel/iavf/iavf_rxtx.c            |  2 +-
 drivers/net/intel/iavf/iavf_rxtx.h            | 11 ++--
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   | 52 +++++++++----------
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c | 49 +++++++++--------
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 20 +++----
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   | 14 ++---
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    | 20 +++----
 7 files changed, 82 insertions(+), 86 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index fd6c7d3a3e..2aed22800e 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -212,7 +212,7 @@ static inline bool
 check_tx_vec_allow(struct ci_tx_queue *txq)
 {
 	if (!(txq->offloads & IAVF_TX_NO_VECTOR_FLAGS) &&
-	    txq->tx_rs_thresh >= IAVF_VPMD_TX_MAX_BURST &&
+	    txq->tx_rs_thresh >= IAVF_VPMD_TX_BURST &&
 	    txq->tx_rs_thresh <= IAVF_VPMD_TX_MAX_FREE_BUF) {
 		PMD_INIT_LOG(DEBUG, "Vector tx can be enabled on this txq.");
 		return true;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 6198643605..8c0bb5475d 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -23,11 +23,12 @@
 #define IAVF_RX_MAX_DATA_BUF_SIZE (16 * 1024 - 128)
 
 /* used for Vector PMD */
-#define IAVF_VPMD_RX_MAX_BURST    32
-#define IAVF_VPMD_TX_MAX_BURST    32
-#define IAVF_RXQ_REARM_THRESH     32
-#define IAVF_VPMD_DESCS_PER_LOOP  4
-#define IAVF_VPMD_TX_MAX_FREE_BUF 64
+#define IAVF_VPMD_RX_BURST             32
+#define IAVF_VPMD_TX_BURST             32
+#define IAVF_VPMD_RXQ_REARM_THRESH     32
+#define IAVF_VPMD_DESCS_PER_LOOP       4
+#define IAVF_VPMD_DESCS_PER_LOOP_WIDE  8
+#define IAVF_VPMD_TX_MAX_FREE_BUF      64
 
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
 		RTE_ETH_TX_OFFLOAD_VLAN_INSERT |		 \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index d94a8b0ae1..40b265183f 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -20,8 +20,6 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 			     uint16_t nb_pkts, uint8_t *split_packet,
 			     bool offload)
 {
-#define IAVF_DESCS_PER_LOOP_AVX 8
-
 	/* const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; */
 	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
 
@@ -34,13 +32,13 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 
 	rte_prefetch0(rxdp);
 
-	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP_WIDE */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP_WIDE);
 
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > IAVF_VPMD_RXQ_REARM_THRESH)
 		iavf_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -178,8 +176,8 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	uint16_t i, received;
 
 	for (i = 0, received = 0; i < nb_pkts;
-	     i += IAVF_DESCS_PER_LOOP_AVX,
-	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+	     i += IAVF_VPMD_DESCS_PER_LOOP_WIDE,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP_WIDE) {
 		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
 		_mm256_storeu_si256((void *)&rx_pkts[i],
 				    _mm256_loadu_si256((void *)&sw_ring[i]));
@@ -217,7 +215,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 		if (split_packet) {
 			int j;
 
-			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+			for (j = 0; j < IAVF_VPMD_DESCS_PER_LOOP_WIDE; j++)
 				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
 		}
 
@@ -436,7 +434,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
 			*(uint64_t *)split_packet =
 				_mm_cvtsi128_si64(split_bits);
-			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP_WIDE;
 		}
 
 		/* perform dd_check */
@@ -452,7 +450,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
-		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+		if (burst != IAVF_VPMD_DESCS_PER_LOOP_WIDE)
 			break;
 	}
 
@@ -492,8 +490,6 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				      uint16_t nb_pkts, uint8_t *split_packet,
 				      bool offload)
 {
-#define IAVF_DESCS_PER_LOOP_AVX 8
-
 	struct iavf_adapter *adapter = rxq->vsi->adapter;
 
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
@@ -509,13 +505,13 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 
 	rte_prefetch0(rxdp);
 
-	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP_WIDE */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP_WIDE);
 
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > IAVF_VPMD_RXQ_REARM_THRESH)
 		iavf_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -725,8 +721,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 	uint16_t i, received;
 
 	for (i = 0, received = 0; i < nb_pkts;
-	     i += IAVF_DESCS_PER_LOOP_AVX,
-	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+	     i += IAVF_VPMD_DESCS_PER_LOOP_WIDE,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP_WIDE) {
 		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
 		_mm256_storeu_si256((void *)&rx_pkts[i],
 				    _mm256_loadu_si256((void *)&sw_ring[i]));
@@ -782,7 +778,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		if (split_packet) {
 			int j;
 
-			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+			for (j = 0; j < IAVF_VPMD_DESCS_PER_LOOP_WIDE; j++)
 				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
 		}
 
@@ -1344,7 +1340,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
 			*(uint64_t *)split_packet =
 				_mm_cvtsi128_si64(split_bits);
-			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP_WIDE;
 		}
 
 		/* perform dd_check */
@@ -1407,7 +1403,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			rxq->hw_time_update = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
 		}
 #endif
-		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+		if (burst != IAVF_VPMD_DESCS_PER_LOOP_WIDE)
 			break;
 	}
 
@@ -1477,7 +1473,7 @@ iavf_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts, bool offload)
 {
 	struct iavf_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2(rxq, rx_pkts, nb_pkts,
@@ -1520,12 +1516,12 @@ iavf_recv_scattered_pkts_vec_avx2_common(void *rx_queue, struct rte_mbuf **rx_pk
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+	while (nb_pkts > IAVF_VPMD_RX_BURST) {
 		uint16_t burst = iavf_recv_scattered_burst_vec_avx2(rx_queue,
-				rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST, offload);
+				rx_pkts + retval, IAVF_VPMD_RX_BURST, offload);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < IAVF_VPMD_RX_MAX_BURST)
+		if (burst < IAVF_VPMD_RX_BURST)
 			return retval;
 	}
 	return retval + iavf_recv_scattered_burst_vec_avx2(rx_queue,
@@ -1566,7 +1562,7 @@ iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
 					    uint16_t nb_pkts, bool offload)
 {
 	struct iavf_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rxq,
@@ -1610,14 +1606,14 @@ iavf_recv_scattered_pkts_vec_avx2_flex_rxd_common(void *rx_queue,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+	while (nb_pkts > IAVF_VPMD_RX_BURST) {
 		uint16_t burst =
 			iavf_recv_scattered_burst_vec_avx2_flex_rxd
-			(rx_queue, rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST,
+			(rx_queue, rx_pkts + retval, IAVF_VPMD_RX_BURST,
 			 offload);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < IAVF_VPMD_RX_MAX_BURST)
+		if (burst < IAVF_VPMD_RX_BURST)
 			return retval;
 	}
 	return retval + iavf_recv_scattered_burst_vec_avx2_flex_rxd(rx_queue,
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index 895b8717f7..53bc69ecf6 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -6,7 +6,6 @@
 
 #include <rte_vect.h>
 
-#define IAVF_DESCS_PER_LOOP_AVX 8
 #define PKTLEN_SHIFT 10
 
 /******************************************************************************
@@ -51,13 +50,13 @@ _iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
 
 	rte_prefetch0(rxdp);
 
-	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP_WIDE */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP_WIDE);
 
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > IAVF_VPMD_RXQ_REARM_THRESH)
 		iavf_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -148,8 +147,8 @@ _iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
 	uint16_t i, received;
 
 	for (i = 0, received = 0; i < nb_pkts;
-	     i += IAVF_DESCS_PER_LOOP_AVX,
-	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+	     i += IAVF_VPMD_DESCS_PER_LOOP_WIDE,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP_WIDE) {
 		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
 		_mm256_storeu_si256((void *)&rx_pkts[i],
 				    _mm256_loadu_si256((void *)&sw_ring[i]));
@@ -196,7 +195,7 @@ _iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
 		if (split_packet) {
 			int j;
 
-			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+			for (j = 0; j < IAVF_VPMD_DESCS_PER_LOOP_WIDE; j++)
 				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
 		}
 
@@ -527,7 +526,7 @@ _iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
 			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
 			*(uint64_t *)split_packet =
 				_mm_cvtsi128_si64(split_bits);
-			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP_WIDE;
 		}
 
 		/* perform dd_check */
@@ -543,7 +542,7 @@ _iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
-		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+		if (burst != IAVF_VPMD_DESCS_PER_LOOP_WIDE)
 			break;
 	}
 
@@ -600,13 +599,13 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 
 	rte_prefetch0(rxdp);
 
-	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP_WIDE */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP_WIDE);
 
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > IAVF_VPMD_RXQ_REARM_THRESH)
 		iavf_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
@@ -716,8 +715,8 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 	uint16_t i, received;
 
 	for (i = 0, received = 0; i < nb_pkts;
-	     i += IAVF_DESCS_PER_LOOP_AVX,
-	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+	     i += IAVF_VPMD_DESCS_PER_LOOP_WIDE,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP_WIDE) {
 		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
 		_mm256_storeu_si256((void *)&rx_pkts[i],
 				    _mm256_loadu_si256((void *)&sw_ring[i]));
@@ -765,7 +764,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 		if (split_packet) {
 			int j;
 
-			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+			for (j = 0; j < IAVF_VPMD_DESCS_PER_LOOP_WIDE; j++)
 				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
 		}
 
@@ -1532,7 +1531,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
 			*(uint64_t *)split_packet =
 				_mm_cvtsi128_si64(split_bits);
-			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP_WIDE;
 		}
 
 		/* perform dd_check */
@@ -1597,7 +1596,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 		}
 #endif
 #endif
-		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+		if (burst != IAVF_VPMD_DESCS_PER_LOOP_WIDE)
 			break;
 	}
 
@@ -1654,7 +1653,7 @@ iavf_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts, bool offload)
 {
 	struct iavf_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx512(rxq, rx_pkts, nb_pkts,
@@ -1697,12 +1696,12 @@ iavf_recv_scattered_pkts_vec_avx512_cmn(void *rx_queue, struct rte_mbuf **rx_pkt
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+	while (nb_pkts > IAVF_VPMD_RX_BURST) {
 		uint16_t burst = iavf_recv_scattered_burst_vec_avx512(rx_queue,
-				rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST, offload);
+				rx_pkts + retval, IAVF_VPMD_RX_BURST, offload);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < IAVF_VPMD_RX_MAX_BURST)
+		if (burst < IAVF_VPMD_RX_BURST)
 			return retval;
 	}
 	return retval + iavf_recv_scattered_burst_vec_avx512(rx_queue,
@@ -1730,7 +1729,7 @@ iavf_recv_scattered_burst_vec_avx512_flex_rxd(void *rx_queue,
 					      bool offload)
 {
 	struct iavf_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
 	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx512_flex_rxd(rxq,
@@ -1775,14 +1774,14 @@ iavf_recv_scattered_pkts_vec_avx512_flex_rxd_cmn(void *rx_queue,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+	while (nb_pkts > IAVF_VPMD_RX_BURST) {
 		uint16_t burst =
 			iavf_recv_scattered_burst_vec_avx512_flex_rxd
 				(rx_queue, rx_pkts + retval,
-				 IAVF_VPMD_RX_MAX_BURST, offload);
+				 IAVF_VPMD_RX_BURST, offload);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < IAVF_VPMD_RX_MAX_BURST)
+		if (burst < IAVF_VPMD_RX_BURST)
 			return retval;
 	}
 	return retval + iavf_recv_scattered_burst_vec_avx512_flex_rxd(rx_queue,
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index f577fd7f3e..c78bebe9b4 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -59,7 +59,7 @@ iavf_rx_vec_queue_default(struct iavf_rx_queue *rxq)
 	if (!rte_is_power_of_2(rxq->nb_rx_desc))
 		return -1;
 
-	if (rxq->rx_free_thresh < IAVF_VPMD_RX_MAX_BURST)
+	if (rxq->rx_free_thresh < IAVF_VPMD_RX_BURST)
 		return -1;
 
 	if (rxq->nb_rx_desc % rxq->rx_free_thresh)
@@ -80,7 +80,7 @@ iavf_tx_vec_queue_default(struct ci_tx_queue *txq)
 	if (!txq)
 		return -1;
 
-	if (txq->tx_rs_thresh < IAVF_VPMD_TX_MAX_BURST ||
+	if (txq->tx_rs_thresh < IAVF_VPMD_TX_BURST ||
 	    txq->tx_rs_thresh > IAVF_VPMD_TX_MAX_FREE_BUF)
 		return -1;
 
@@ -252,8 +252,8 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxp,
-				 IAVF_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
+				 IAVF_VPMD_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + IAVF_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			__m128i dma_addr0;
 
@@ -265,7 +265,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
+			IAVF_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
@@ -275,7 +275,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
+	for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
 		mb0 = rxp[0];
@@ -307,7 +307,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 		__m512i dma_addr0_3, dma_addr4_7;
 		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
 		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
+		for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH;
 				i += 8, rxp += 8, rxdp += 8) {
 			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
 			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
@@ -378,7 +378,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 		__m256i dma_addr0_1, dma_addr2_3;
 		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
+		for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH;
 				i += 4, rxp += 4, rxdp += 4) {
 			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
 			__m256i vaddr0_1, vaddr2_3;
@@ -423,11 +423,11 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 
 #endif
 
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += IAVF_VPMD_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
 
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= IAVF_VPMD_RXQ_REARM_THRESH;
 
 	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index a583340f15..86f3a7839d 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -31,8 +31,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
 					  (void *)rxep,
-					  IAVF_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
+					  IAVF_VPMD_RXQ_REARM_THRESH) < 0)) {
+		if (rxq->rxrearm_nb + IAVF_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
 				rxep[i] = &rxq->fake_mbuf;
@@ -40,12 +40,12 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 			}
 		}
 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
+			IAVF_VPMD_RXQ_REARM_THRESH;
 		return;
 	}
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+	for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		mb0 = rxep[0];
 		mb1 = rxep[1];
 
@@ -60,11 +60,11 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
 	}
 
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
+	rxq->rxrearm_start += IAVF_VPMD_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
 
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
+	rxq->rxrearm_nb -= IAVF_VPMD_RXQ_REARM_THRESH;
 
 	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
@@ -233,7 +233,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
 	/* See if we need to rearm the RX queue - gives the prefetch a bit
 	 * of time to act
 	 */
-	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+	if (rxq->rxrearm_nb > IAVF_VPMD_RXQ_REARM_THRESH)
 		iavf_rxq_rearm(rxq);
 
 	/* Before we start moving massive data around, check to see if
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 8ccdec7f8a..190c1dd869 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -1175,7 +1175,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 
 /* Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ * - nb_pkts > IAVF_VPMD_RX_BURST, only scan IAVF_VPMD_RX_BURST
  *   numbers of DD bits
  */
 uint16_t
@@ -1187,7 +1187,7 @@ iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 /* Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ * - nb_pkts > IAVF_VPMD_RX_BURST, only scan IAVF_VPMD_RX_BURST
  *   numbers of DD bits
  */
 uint16_t
@@ -1208,7 +1208,7 @@ iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 	struct iavf_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 	unsigned int i = 0;
 
 	/* get some new buffers */
@@ -1247,15 +1247,15 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+	while (nb_pkts > IAVF_VPMD_RX_BURST) {
 		uint16_t burst;
 
 		burst = iavf_recv_scattered_burst_vec(rx_queue,
 						      rx_pkts + retval,
-						      IAVF_VPMD_RX_MAX_BURST);
+						      IAVF_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < IAVF_VPMD_RX_MAX_BURST)
+		if (burst < IAVF_VPMD_RX_BURST)
 			return retval;
 	}
 
@@ -1277,7 +1277,7 @@ iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
 				       uint16_t nb_pkts)
 {
 	struct iavf_rx_queue *rxq = rx_queue;
-	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 	unsigned int i = 0;
 
 	/* get some new buffers */
@@ -1317,15 +1317,15 @@ iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
 {
 	uint16_t retval = 0;
 
-	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+	while (nb_pkts > IAVF_VPMD_RX_BURST) {
 		uint16_t burst;
 
 		burst = iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
 						rx_pkts + retval,
-						IAVF_VPMD_RX_MAX_BURST);
+						IAVF_VPMD_RX_BURST);
 		retval += burst;
 		nb_pkts -= burst;
-		if (burst < IAVF_VPMD_RX_MAX_BURST)
+		if (burst < IAVF_VPMD_RX_BURST)
 			return retval;
 	}
 
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 15/25] net/ixgbe: create common Rx queue structure
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (13 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 14/25] net/iavf: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 16/25] net/i40e: use the " Anatoly Burakov
                     ` (9 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin

In preparation for deduplication effort, generalize the Rx queue structure.

The entire Rx queue structure is moved to common/rx.h, clarifying the
comments where necessary, and separating common parts from ixgbe-specific
parts.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Separate out some of the changes from this commit into previous commits
    - Rename CI_RX_BURST to CI_RX_MAX_BURST to match the driver naming convention

 drivers/net/intel/common/rx.h                 |  67 ++++++++++-
 drivers/net/intel/ixgbe/ixgbe_ethdev.c        |   8 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    |   6 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 110 +++++++++---------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  65 +----------
 .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |   4 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c |  18 +--
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  |  18 +--
 8 files changed, 150 insertions(+), 146 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index abb01ba5e7..80a9f21303 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -10,14 +10,75 @@
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
 
-#define CI_RX_BURST 32
+#define CI_RX_MAX_BURST 32
+
+struct ci_rx_queue;
+
+struct ci_rx_entry {
+	struct rte_mbuf *mbuf; /* mbuf associated with RX descriptor. */
+};
+
+struct ci_rx_entry_sc {
+	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct ci_rx_queue {
+	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
+	union { /* RX ring virtual address */
+		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+	};
+	volatile uint8_t *qrx_tail;   /**< register address of tail */
+	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
+	struct ci_rx_entry_sc *sw_sc_ring; /**< address of scattered Rx software ring. */
+	rte_iova_t rx_ring_phys_addr; /**< RX ring DMA address. */
+	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+	/** hold packets to return to application */
+	struct rte_mbuf *rx_stage[CI_RX_MAX_BURST * 2];
+	uint16_t nb_rx_desc; /**< number of RX descriptors. */
+	uint16_t rx_tail;  /**< current value of tail register. */
+	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
+	uint16_t nb_rx_hold; /**< number of held free RX desc. */
+	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
+	uint16_t rx_free_thresh; /**< max free RX desc to hold. */
+	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
+	uint16_t rxrearm_nb;     /**< number of remaining to be re-armed */
+	uint16_t rxrearm_start;  /**< the idx we start the re-arming from */
+	uint16_t queue_id; /**< RX queue index. */
+	uint16_t port_id;  /**< Device port identifier. */
+	uint16_t reg_idx;  /**< RX queue register index. */
+	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool vector_rx; /**< indicates that vector RX is in use */
+	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
+	uint64_t mbuf_initializer; /**< value to init mbufs */
+	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
+	struct rte_mbuf fake_mbuf;
+	const struct rte_memzone *mz;
+	union {
+		struct { /* ixgbe specific values */
+			/** indicates that IPsec RX feature is in use */
+			uint8_t using_ipsec;
+			/** Packet type mask for different NICs. */
+			uint16_t pkt_type_mask;
+			/** UDP frames with a 0 checksum can be marked as checksum errors. */
+			uint8_t rx_udp_csum_zero_err;
+			/** flags to set in mbuf when a vlan is detected. */
+			uint64_t vlan_flags;
+		};
+	};
+};
 
 static inline uint16_t
 ci_rx_reassemble_packets(struct rte_mbuf **rx_bufs, uint16_t nb_bufs, uint8_t *split_flags,
 		struct rte_mbuf **pkt_first_seg, struct rte_mbuf **pkt_last_seg,
 		const uint8_t crc_len)
 {
-	struct rte_mbuf *pkts[CI_RX_BURST] = {0}; /*finished pkts*/
+	struct rte_mbuf *pkts[CI_RX_MAX_BURST] = {0}; /*finished pkts*/
 	struct rte_mbuf *start = *pkt_first_seg;
 	struct rte_mbuf *end = *pkt_last_seg;
 	unsigned int pkt_idx, buf_idx;
@@ -97,7 +158,7 @@ static inline bool
 ci_rxq_vec_capable(uint16_t nb_desc, uint16_t rx_free_thresh, uint64_t offloads)
 {
 	if (!rte_is_power_of_2(nb_desc) ||
-			rx_free_thresh < CI_RX_BURST ||
+			rx_free_thresh < CI_RX_MAX_BURST ||
 			(nb_desc % rx_free_thresh) != 0)
 		return false;
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_ethdev.c b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
index f1fd271a0a..df1eecc3c1 100644
--- a/drivers/net/intel/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
@@ -2022,7 +2022,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
 {
 	struct ixgbe_hwstrip *hwstrip =
 		IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
 		return;
@@ -2157,7 +2157,7 @@ ixgbe_vlan_hw_strip_config(struct rte_eth_dev *dev)
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t ctrl;
 	uint16_t i;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool on;
 
 	PMD_INIT_FUNC_TRACE();
@@ -2200,7 +2200,7 @@ ixgbe_config_vlan_strip_on_all_queues(struct rte_eth_dev *dev, int mask)
 {
 	uint16_t i;
 	struct rte_eth_rxmode *rxmode;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (mask & RTE_ETH_VLAN_STRIP_MASK) {
 		rxmode = &dev->data->dev_conf.rxmode;
@@ -5789,7 +5789,7 @@ ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
 static int
 ixgbevf_vlan_offload_config(struct rte_eth_dev *dev, int mask)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t i;
 	int on = 0;
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index 2ab7abbf4e..1df1787c7f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -11,15 +11,15 @@
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union ixgbe_adv_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 22d0aea1a7..00a14adfa7 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -1419,11 +1419,11 @@ int
 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = &rxq->ixgbe_rx_ring[desc];
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.upper.status_error;
 
@@ -1563,10 +1563,10 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t pkt_flags;
@@ -1578,7 +1578,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 	uint64_t vlan_flags = rxq->vlan_flags;
 
 	/* get references to current descriptor and S/W ring entry */
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	status = rxdp->wb.upper.status_error;
@@ -1663,10 +1663,10 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 }
 
 static inline int
-ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
+ixgbe_rx_alloc_bufs(struct ci_rx_queue *rxq, bool reset_mbuf)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx;
 	__le64 dma_addr;
@@ -1680,7 +1680,7 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 	if (unlikely(diag != 0))
 		return -ENOMEM;
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = &rxq->ixgbe_rx_ring[alloc_idx];
 	for (i = 0; i < rxq->rx_free_thresh; ++i) {
 		/* populate the static rte mbuf fields */
 		mb = rxep[i].mbuf;
@@ -1707,7 +1707,7 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 }
 
 static inline uint16_t
-ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+ixgbe_rx_fill_from_stage(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
 	struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
@@ -1731,7 +1731,7 @@ static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	     uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	/* Any previously recv'd pkts will be returned from the Rx stage */
@@ -1820,11 +1820,11 @@ uint16_t
 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union ixgbe_adv_rx_desc *rx_ring;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
-	struct ixgbe_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
 	union ixgbe_adv_rx_desc rxd;
@@ -1842,7 +1842,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = rxq->ixgbe_rx_ring;
 	sw_ring = rxq->sw_ring;
 	vlan_flags = rxq->vlan_flags;
 	while (nb_rx < nb_pkts) {
@@ -2047,7 +2047,7 @@ static inline void
 ixgbe_fill_cluster_head_buf(
 	struct rte_mbuf *head,
 	union ixgbe_adv_rx_desc *desc,
-	struct ixgbe_rx_queue *rxq,
+	struct ci_rx_queue *rxq,
 	uint32_t staterr)
 {
 	uint32_t pkt_info;
@@ -2109,10 +2109,10 @@ static inline uint16_t
 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 		    bool bulk_alloc)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
-	struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
-	struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->ixgbe_rx_ring;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry_sc *sw_sc_ring = rxq->sw_sc_ring;
 	uint16_t rx_id = rxq->rx_tail;
 	uint16_t nb_rx = 0;
 	uint16_t nb_hold = rxq->nb_rx_hold;
@@ -2120,10 +2120,10 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 
 	while (nb_rx < nb_pkts) {
 		bool eop;
-		struct ixgbe_rx_entry *rxe;
-		struct ixgbe_scattered_rx_entry *sc_entry;
-		struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
-		struct ixgbe_rx_entry *next_rxe = NULL;
+		struct ci_rx_entry *rxe;
+		struct ci_rx_entry_sc *sc_entry;
+		struct ci_rx_entry_sc *next_sc_entry = NULL;
+		struct ci_rx_entry *next_rxe = NULL;
 		struct rte_mbuf *first_seg;
 		struct rte_mbuf *rxm;
 		struct rte_mbuf *nmb = NULL;
@@ -2962,7 +2962,7 @@ ixgbe_free_sc_cluster(struct rte_mbuf *m)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 
@@ -2999,7 +2999,7 @@ ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release(struct ci_rx_queue *rxq)
 {
 	if (rxq != NULL) {
 		ixgbe_rx_queue_release_mbufs(rxq);
@@ -3025,7 +3025,7 @@ ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
  *           function must be used.
  */
 static inline int __rte_cold
-check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -3062,7 +3062,7 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
 
 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
 static void __rte_cold
-ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
+ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ci_rx_queue *rxq)
 {
 	static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
 	unsigned i;
@@ -3083,7 +3083,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
 	 * reads extra memory as zeros.
 	 */
 	for (i = 0; i < len; i++) {
-		rxq->rx_ring[i] = zeroed_desc;
+		rxq->ixgbe_rx_ring[i] = zeroed_desc;
 	}
 
 	/*
@@ -3198,7 +3198,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			 struct rte_mempool *mp)
 {
 	const struct rte_memzone *rz;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_hw     *hw;
 	uint16_t len;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
@@ -3227,7 +3227,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 
 	/* First allocate the rx queue data structure */
-	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
+	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE, socket_id);
 	if (rxq == NULL)
 		return -ENOMEM;
@@ -3297,7 +3297,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
+	rxq->ixgbe_rx_ring = (union ixgbe_adv_rx_desc *)rz->addr;
 
 	/*
 	 * Certain constraints must be met in order to use the bulk buffer
@@ -3322,7 +3322,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		len += IXGBE_RX_MAX_BURST;
 
 	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
-					  sizeof(struct ixgbe_rx_entry) * len,
+					  sizeof(struct ci_rx_entry) * len,
 					  RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3339,7 +3339,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 */
 	rxq->sw_sc_ring =
 		rte_zmalloc_socket("rxq->sw_sc_ring",
-				   sizeof(struct ixgbe_scattered_rx_entry) * len,
+				   sizeof(struct ci_rx_entry_sc) * len,
 				   RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_sc_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3348,7 +3348,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
 			    "dma_addr=0x%"PRIx64,
-		     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
+		     rxq->sw_ring, rxq->sw_sc_ring, rxq->ixgbe_rx_ring,
 		     rxq->rx_ring_phys_addr);
 
 	if (!rte_is_power_of_2(nb_desc)) {
@@ -3372,11 +3372,11 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 {
 #define IXGBE_RXQ_SCAN_INTERVAL 4
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 
 	while ((desc < rxq->nb_rx_desc) &&
 		(rxdp->wb.upper.status_error &
@@ -3384,7 +3384,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 		desc += IXGBE_RXQ_SCAN_INTERVAL;
 		rxdp += IXGBE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
+			rxdp = &(rxq->ixgbe_rx_ring[rxq->rx_tail +
 				desc - rxq->nb_rx_desc]);
 	}
 
@@ -3394,7 +3394,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 int
 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint32_t *status;
 	uint32_t nb_hold, desc;
 
@@ -3414,7 +3414,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.upper.status_error;
+	status = &rxq->ixgbe_rx_ring[desc].wb.upper.status_error;
 	if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
 		return RTE_ETH_RX_DESC_DONE;
 
@@ -3499,7 +3499,7 @@ ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
 	}
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 		if (rxq != NULL) {
 			ixgbe_rx_queue_release_mbufs(rxq);
@@ -4661,9 +4661,9 @@ ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
 }
 
 static int __rte_cold
-ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ixgbe_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	unsigned int i;
 
@@ -4683,7 +4683,7 @@ ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
 
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
-		rxd = &rxq->rx_ring[i];
+		rxd = &rxq->ixgbe_rx_ring[i];
 		rxd->read.hdr_addr = 0;
 		rxd->read.pkt_addr = dma_addr;
 		rxe[i].mbuf = mbuf;
@@ -5098,7 +5098,7 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 		dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 		rxq->vector_rx = rx_using_sse;
 #ifdef RTE_LIB_SECURITY
@@ -5176,7 +5176,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 
 	/* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		uint32_t srrctl =
 			IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
 		uint32_t rscctl =
@@ -5252,7 +5252,7 @@ int __rte_cold
 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint64_t bus_addr;
 	uint32_t rxctrl;
 	uint32_t fctrl;
@@ -5548,7 +5548,7 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t dmatxctl;
 	uint32_t rxctrl;
@@ -5635,7 +5635,7 @@ int __rte_cold
 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5678,7 +5678,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5812,7 +5812,7 @@ void
 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -5850,7 +5850,7 @@ void
 ixgbe_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
 
 	rxq = dev->data->rx_queues[queue_id];
@@ -5876,7 +5876,7 @@ int __rte_cold
 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
 	uint64_t bus_addr;
@@ -6063,7 +6063,7 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t rxdctl;
 	uint16_t i;
@@ -6261,7 +6261,7 @@ ixgbe_recycle_rx_descriptors_refill_vec(void __rte_unused * rx_queue,
 }
 
 int
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue __rte_unused * rxq)
 {
 	return -1;
 }
@@ -6288,7 +6288,7 @@ ixgbe_txq_vec_setup(struct ci_tx_queue *txq __rte_unused)
 }
 
 void
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue __rte_unused * rxq)
 {
 	return;
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 5742e845cf..d1847a33dd 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _IXGBE_RXTX_H_
 #define _IXGBE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 
 /*
@@ -30,7 +31,7 @@
 #define	IXGBE_MAX_RING_DESC	8192
 
 #define IXGBE_TX_MAX_BURST            32
-#define IXGBE_RX_MAX_BURST            32
+#define IXGBE_RX_MAX_BURST            CI_RX_MAX_BURST
 #define IXGBE_TX_MAX_FREE_BUF_SZ      64
 
 #define IXGBE_VPMD_DESCS_PER_LOOP     4
@@ -64,64 +65,6 @@
 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
 #define IXGBE_PACKET_TYPE_SHIFT             0X04
 
-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct ixgbe_rx_entry {
-	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-struct ixgbe_scattered_rx_entry {
-	struct rte_mbuf *fbuf; /**< First segment of the fragmented packet. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct ixgbe_rx_queue {
-	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
-	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
-	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
-	volatile uint32_t   *qrx_tail; /**< RDT register address. */
-	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
-	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
-	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
-	uint64_t            mbuf_initializer; /**< value to init mbufs */
-	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
-	uint16_t            rx_tail;  /**< current value of RDT register. */
-	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
-	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
-	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	uint8_t            vector_rx;
-	/**< indicates that vector RX is in use */
-#ifdef RTE_LIB_SECURITY
-	uint8_t            using_ipsec;
-	/**< indicates that IPsec RX feature is in use */
-#endif
-	uint16_t            rxrearm_nb;     /**< number of remaining to be re-armed */
-	uint16_t            rxrearm_start;  /**< the idx we start the re-arming from */
-	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
-	uint16_t            queue_id; /**< RX queue index. */
-	uint16_t            reg_idx;  /**< RX queue register index. */
-	uint16_t            pkt_type_mask;  /**< Packet type mask for different NICs. */
-	uint16_t            port_id;  /**< Device port identifier. */
-	uint8_t             crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
-	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
-	uint8_t             rx_deferred_start; /**< not in global dev start. */
-	/** UDP frames with a 0 checksum can be marked as checksum errors. */
-	uint8_t             rx_udp_csum_zero_err;
-	/** flags to set in mbuf when a vlan is detected. */
-	uint64_t            vlan_flags;
-	uint64_t	    offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
-	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
-	struct rte_mbuf fake_mbuf;
-	/** hold packets to return to application */
-	struct rte_mbuf *rx_stage[IXGBE_RX_MAX_BURST * 2];
-	const struct rte_memzone *mz;
-};
-
 /**
  * IXGBE CTX Constants
  */
@@ -226,8 +169,8 @@ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t ixgbe_recv_scattered_pkts_vec(void *rx_queue,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);
-void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
+int ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq);
+void ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 int ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt);
 
 extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
index 4442dc7b39..538a2b5164 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
@@ -69,7 +69,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 }
 
 static inline void
-_ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+_ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 
@@ -173,7 +173,7 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 		return -1;
 
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads))
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 02d9dbb573..82c655e769 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -12,19 +12,19 @@
 #include "ixgbe_rxtx_vec_common.h"
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 	uint8x8_t p;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
@@ -282,11 +282,11 @@ desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a IXGBE_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint8x16_t shuf_msk = {
@@ -309,7 +309,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -488,7 +488,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IXGBE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -634,7 +634,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -657,7 +657,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index ea57631932..f6aa3f9f9a 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -13,12 +13,12 @@
 #include <rte_vect.h>
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
@@ -26,7 +26,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 
 	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -327,11 +327,11 @@ desc_to_ptype_v(__m128i descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a IXGBE_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 #ifdef RTE_LIB_SECURITY
 	uint8_t use_ipsec = rxq->using_ipsec;
@@ -377,7 +377,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IXGBE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,7 +755,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -778,7 +778,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 16/25] net/i40e: use the common Rx queue structure
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (14 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 15/25] net/ixgbe: create common Rx queue structure Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 17/25] net/ice: " Anatoly Burakov
                     ` (8 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

Make the i40e driver use the new common Rx queue structure.

The i40e driver supports 16-byte and 32-byte Rx descriptor formats, which
is shared by other drivers. To have fewer driver-specific definitions in
common structures, add a header file defining shared descriptor formats,
and switch between 16-byte and 32-byte formats by way of the existing
RTE_NET_INTEL_USE_16BYTE_DESC define.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Separate some of the changes from this commit
    - Introduce common descriptor format

 drivers/net/intel/common/desc.h               |  89 ++++++++++++
 drivers/net/intel/common/rx.h                 |  15 ++
 drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
 drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
 drivers/net/intel/i40e/i40e_fdir.c            |  16 +--
 .../i40e/i40e_recycle_mbufs_vec_common.c      |   6 +-
 drivers/net/intel/i40e/i40e_rxtx.c            | 134 +++++++++---------
 drivers/net/intel/i40e/i40e_rxtx.h            |  74 ++--------
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h |   6 +-
 .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  20 +--
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  14 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  14 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  24 ++--
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  24 ++--
 15 files changed, 248 insertions(+), 200 deletions(-)
 create mode 100644 drivers/net/intel/common/desc.h

diff --git a/drivers/net/intel/common/desc.h b/drivers/net/intel/common/desc.h
new file mode 100644
index 0000000000..f9e7f27991
--- /dev/null
+++ b/drivers/net/intel/common/desc.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Intel Corporation
+ */
+
+ #ifndef _COMMON_INTEL_DESC_H_
+ #define _COMMON_INTEL_DESC_H_
+
+#include <rte_byteorder.h>
+
+/* HW desc structures, both 16-byte and 32-byte types are supported */
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
+union ci_rx_desc {
+	struct {
+		rte_le64_t pkt_addr; /* Packet buffer address */
+		rte_le64_t hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			struct {
+				union {
+					rte_le16_t mirroring_status;
+					rte_le16_t fcoe_ctx_id;
+				} mirr_fcoe;
+				rte_le16_t l2tag1;
+			} lo_dword;
+			union {
+				rte_le32_t rss; /* RSS Hash */
+				rte_le32_t fd_id; /* Flow director filter id */
+				rte_le32_t fcoe_param; /* FCoE DDP Context id */
+			} hi_dword;
+		} qword0;
+		struct {
+			/* ext status/error/pktype/length */
+			rte_le64_t status_error_len;
+		} qword1;
+	} wb;  /* writeback */
+};
+#else
+union ci_rx_desc {
+	struct {
+		rte_le64_t  pkt_addr; /* Packet buffer address */
+		rte_le64_t  hdr_addr; /* Header buffer address */
+			/* bit 0 of hdr_buffer_addr is DD bit */
+		rte_le64_t  rsvd1;
+		rte_le64_t  rsvd2;
+	} read;
+	struct {
+		struct {
+			struct {
+				union {
+					rte_le16_t mirroring_status;
+					rte_le16_t fcoe_ctx_id;
+				} mirr_fcoe;
+				rte_le16_t l2tag1;
+			} lo_dword;
+			union {
+				rte_le32_t rss; /* RSS Hash */
+				rte_le32_t fcoe_param; /* FCoE DDP Context id */
+				/* Flow director filter id in case of
+				 * Programming status desc WB
+				 */
+				rte_le32_t fd_id;
+			} hi_dword;
+		} qword0;
+		struct {
+			/* status/error/pktype/length */
+			rte_le64_t status_error_len;
+		} qword1;
+		struct {
+			rte_le16_t ext_status; /* extended status */
+			rte_le16_t rsvd;
+			rte_le16_t l2tag2_1;
+			rte_le16_t l2tag2_2;
+		} qword2;
+		struct {
+			union {
+				rte_le32_t flex_bytes_lo;
+				rte_le32_t pe_status;
+			} lo_dword;
+			union {
+				rte_le32_t flex_bytes_hi;
+				rte_le32_t fd_id;
+			} hi_dword;
+		} qword3;
+	} wb;  /* writeback */
+};
+#endif
+
+#endif /* _COMMON_INTEL_DESC_H_ */
diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 80a9f21303..8da52fd78e 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -10,6 +10,8 @@
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
 
+#include "desc.h"
+
 #define CI_RX_MAX_BURST 32
 
 struct ci_rx_queue;
@@ -29,6 +31,7 @@ struct ci_rx_queue {
 	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
 	union { /* RX ring virtual address */
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+		volatile union ci_rx_desc *rx_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -50,14 +53,22 @@ struct ci_rx_queue {
 	uint16_t queue_id; /**< RX queue index. */
 	uint16_t port_id;  /**< Device port identifier. */
 	uint16_t reg_idx;  /**< RX queue register index. */
+	uint16_t rx_buf_len; /* The packet buffer size */
+	uint16_t rx_hdr_len; /* The header buffer size */
+	uint16_t max_pkt_len; /* Maximum packet length */
 	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool q_set; /**< indicate if rx queue has been configured */
 	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
 	bool vector_rx; /**< indicates that vector RX is in use */
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
+	union { /* the VSI this queue belongs to */
+		struct i40e_vsi *i40e_vsi;
+	};
 	const struct rte_memzone *mz;
 	union {
 		struct { /* ixgbe specific values */
@@ -70,6 +81,10 @@ struct ci_rx_queue {
 			/** flags to set in mbuf when a vlan is detected. */
 			uint64_t vlan_flags;
 		};
+		struct { /* i40e specific values */
+			uint8_t hs_mode; /**< Header Split mode */
+			uint8_t dcb_tc; /**< Traffic class of rx queue */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/i40e/i40e_ethdev.c b/drivers/net/intel/i40e/i40e_ethdev.c
index 90eba3419f..e0a865845b 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.c
+++ b/drivers/net/intel/i40e/i40e_ethdev.c
@@ -6609,7 +6609,7 @@ i40e_dev_rx_init(struct i40e_pf *pf)
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int ret = I40E_SUCCESS;
 	uint16_t i;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	i40e_pf_config_rss(pf);
 	for (i = 0; i < data->nb_rx_queues; i++) {
@@ -8974,7 +8974,7 @@ i40e_pf_calc_configured_queues_num(struct i40e_pf *pf)
 {
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int i, num;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	num = 0;
 	for (i = 0; i < pf->lan_nb_qps; i++) {
diff --git a/drivers/net/intel/i40e/i40e_ethdev.h b/drivers/net/intel/i40e/i40e_ethdev.h
index ccc8732d7d..44864292d0 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.h
+++ b/drivers/net/intel/i40e/i40e_ethdev.h
@@ -333,7 +333,7 @@ struct i40e_vsi_list {
 	struct i40e_vsi *vsi;
 };
 
-struct i40e_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 /* Bandwidth limit information */
@@ -739,7 +739,7 @@ struct i40e_fdir_info {
 	struct i40e_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	uint16_t match_counter_index;  /* Statistic counter index used for fdir*/
 	struct ci_tx_queue *txq;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt[I40E_FDIR_PRG_PKT_CNT];     /* memory for fdir program packet */
 	uint64_t dma_addr[I40E_FDIR_PRG_PKT_CNT]; /* physic address of packet memory*/
 	/*
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 734218b67d..a891819f47 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -100,9 +100,9 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status);
 
 static int
-i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_fdir_rx_queue_init(struct ci_rx_queue *rxq)
 {
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct i40e_hmc_obj_rxq rx_ctx;
 	int err = I40E_SUCCESS;
 
@@ -139,7 +139,7 @@ i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
 		return err;
 	}
 	rxq->qrx_tail = hw->hw_addr +
-		I40E_QRX_TAIL(rxq->vsi->base_queue);
+		I40E_QRX_TAIL(rxq->i40e_vsi->base_queue);
 
 	rte_wmb();
 	/* Init the RX tail register. */
@@ -382,7 +382,7 @@ i40e_fdir_rx_proc_enable(struct rte_eth_dev *dev, bool on)
 	int32_t i;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		rxq->fdir_enabled = on;
@@ -929,9 +929,9 @@ i40e_build_ctob(uint32_t td_cmd,
  * tx queue
  */
 static inline int
-i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
+i40e_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
-	volatile union i40e_rx_desc *rxdp;
+	volatile union ci_rx_desc *rxdp;
 	uint64_t qword1;
 	uint32_t rx_status;
 	uint32_t len, id;
@@ -987,7 +987,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 }
 
 static inline void
-i40e_fdir_programming_status_cleanup(struct i40e_rx_queue *rxq)
+i40e_fdir_programming_status_cleanup(struct ci_rx_queue *rxq)
 {
 	uint16_t retry_count = 0;
 
@@ -1627,7 +1627,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct i40e_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	const struct i40e_fdir_action *fdir_action = &filter->action;
 	volatile struct i40e_tx_desc *txdp;
 	volatile struct i40e_filter_program_desc *fdirdp;
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 2875c578af..20d9fd7b22 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -13,9 +13,9 @@
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	struct i40e_rx_entry *rxep;
-	volatile union i40e_rx_desc *rxdp;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
+	volatile union ci_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 2e61076378..0b06130fe5 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -94,8 +94,8 @@ i40e_monitor_callback(const uint64_t value,
 int
 i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	volatile union i40e_rx_desc *rxdp;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ci_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
@@ -113,7 +113,7 @@ i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 }
 
 static inline void
-i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
+i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ci_rx_desc *rxdp)
 {
 	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		(1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
@@ -214,7 +214,7 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword)
 #endif
 
 static inline uint64_t
-i40e_rxd_build_fdir(volatile union i40e_rx_desc *rxdp, struct rte_mbuf *mb)
+i40e_rxd_build_fdir(volatile union ci_rx_desc *rxdp, struct rte_mbuf *mb)
 {
 	uint64_t flags = 0;
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
@@ -416,9 +416,9 @@ i40e_xmit_cleanup(struct ci_tx_queue *txq)
 
 static inline int
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-check_rx_burst_bulk_alloc_preconditions(struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 #else
-check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(__rte_unused struct ci_rx_queue *rxq)
 #endif
 {
 	int ret = 0;
@@ -456,10 +456,10 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
 #error "PMD I40E: I40E_LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
+i40e_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -467,7 +467,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 	int32_t s[I40E_LOOK_AHEAD], var, nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
@@ -558,7 +558,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 }
 
 static inline uint16_t
-i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
+i40e_rx_fill_from_stage(struct ci_rx_queue *rxq,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
@@ -577,10 +577,10 @@ i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
 }
 
 static inline int
-i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
+i40e_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -629,7 +629,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = (struct i40e_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	struct rte_eth_dev *dev;
 	uint16_t nb_rx = 0;
 
@@ -648,7 +648,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (i40e_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 
@@ -707,12 +707,12 @@ i40e_recv_pkts_bulk_alloc(void __rte_unused *rx_queue,
 uint16_t
 i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq;
-	volatile union i40e_rx_desc *rx_ring;
-	volatile union i40e_rx_desc *rxdp;
-	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_queue *rxq;
+	volatile union ci_rx_desc *rx_ring;
+	volatile union ci_rx_desc *rxdp;
+	union ci_rx_desc rxd;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -731,7 +731,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	rx_id = rxq->rx_tail;
 	rx_ring = rxq->rx_ring;
 	sw_ring = rxq->sw_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -745,7 +745,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -837,12 +837,12 @@ i40e_recv_scattered_pkts(void *rx_queue,
 			 struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	volatile union i40e_rx_desc *rx_ring = rxq->rx_ring;
-	volatile union i40e_rx_desc *rxdp;
-	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring = rxq->sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ci_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union ci_rx_desc *rxdp;
+	union ci_rx_desc rxd;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -853,7 +853,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 	uint64_t qword1;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -867,7 +867,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -1798,7 +1798,7 @@ i40e_get_queue_offset_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
 int
 i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -1841,7 +1841,7 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -2004,7 +2004,7 @@ i40e_dev_first_queue(uint16_t idx, void **queues, int num)
 
 static int
 i40e_dev_rx_queue_setup_runtime(struct rte_eth_dev *dev,
-				struct i40e_rx_queue *rxq)
+				struct ci_rx_queue *rxq)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
@@ -2081,7 +2081,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct i40e_vsi *vsi;
 	struct i40e_pf *pf = NULL;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size;
 	uint16_t len, i;
@@ -2116,7 +2116,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("i40e rx queue",
-				 sizeof(struct i40e_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -2135,7 +2135,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	else
 		rxq->crc_len = 0;
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->i40e_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->offloads = offloads;
 
@@ -2148,7 +2148,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 */
 	len += I40E_RX_MAX_BURST;
 
-	ring_size = RTE_ALIGN(len * sizeof(union i40e_rx_desc),
+	ring_size = RTE_ALIGN(len * sizeof(union ci_rx_desc),
 			      I40E_DMA_MEM_ALIGN);
 
 	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
@@ -2164,14 +2164,14 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	memset(rz->addr, 0, ring_size);
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	rxq->rx_ring = (union ci_rx_desc *)rz->addr;
 
 	len = (uint16_t)(nb_desc + I40E_RX_MAX_BURST);
 
 	/* Allocate the software ring. */
 	rxq->sw_ring =
 		rte_zmalloc_socket("i40e rx sw ring",
-				   sizeof(struct i40e_rx_entry) * len,
+				   sizeof(struct ci_rx_entry) * len,
 				   RTE_CACHE_LINE_SIZE,
 				   socket_id);
 	if (!rxq->sw_ring) {
@@ -2242,7 +2242,7 @@ i40e_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 void
 i40e_rx_queue_release(void *rxq)
 {
-	struct i40e_rx_queue *q = (struct i40e_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -2259,8 +2259,8 @@ uint32_t
 i40e_dev_rx_queue_count(void *rx_queue)
 {
 #define I40E_RXQ_SCAN_INTERVAL 4
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_queue *rxq;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
@@ -2287,7 +2287,7 @@ i40e_dev_rx_queue_count(void *rx_queue)
 int
 i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -2628,7 +2628,7 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
 }
 
 void
-i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -2663,7 +2663,7 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 }
 
 void
-i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
+i40e_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 	uint16_t len;
@@ -2680,7 +2680,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 #endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */
 		len = rxq->nb_rx_desc;
 
-	for (i = 0; i < len * sizeof(union i40e_rx_desc); i++)
+	for (i = 0; i < len * sizeof(union ci_rx_desc); i++)
 		((volatile char *)rxq->rx_ring)[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
@@ -2898,14 +2898,14 @@ i40e_tx_queue_init(struct ci_tx_queue *txq)
 }
 
 int
-i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
+i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct i40e_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
-		volatile union i40e_rx_desc *rxd;
+		volatile union ci_rx_desc *rxd;
 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (unlikely(!mbuf)) {
@@ -2941,10 +2941,10 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
  * and maximum packet length.
  */
 static int
-i40e_rx_queue_config(struct i40e_rx_queue *rxq)
+i40e_rx_queue_config(struct ci_rx_queue *rxq)
 {
-	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->vsi);
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->i40e_vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct rte_eth_dev_data *data = pf->dev_data;
 	uint16_t buf_size;
 
@@ -2988,11 +2988,11 @@ i40e_rx_queue_config(struct i40e_rx_queue *rxq)
 
 /* Init the RX queue in hardware */
 int
-i40e_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_rx_queue_init(struct ci_rx_queue *rxq)
 {
 	int err = I40E_SUCCESS;
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
-	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
+	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->i40e_vsi);
 	uint16_t pf_q = rxq->reg_idx;
 	uint16_t buf_size;
 	struct i40e_hmc_obj_rxq rx_ctx;
@@ -3166,7 +3166,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 enum i40e_status_code
 i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -3180,7 +3180,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("i40e fdir rx queue",
-				  sizeof(struct i40e_rx_queue),
+				  sizeof(struct ci_rx_queue),
 				  RTE_CACHE_LINE_SIZE,
 				  SOCKET_ID_ANY);
 	if (!rxq) {
@@ -3190,7 +3190,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 	}
 
 	/* Allocate RX hardware ring descriptors. */
-	ring_size = sizeof(union i40e_rx_desc) * I40E_FDIR_NUM_RX_DESC;
+	ring_size = sizeof(union ci_rx_desc) * I40E_FDIR_NUM_RX_DESC;
 	ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
 
 	rz = rte_eth_dma_zone_reserve(dev, "fdir_rx_ring",
@@ -3206,11 +3206,11 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 	rxq->nb_rx_desc = I40E_FDIR_NUM_RX_DESC;
 	rxq->queue_id = I40E_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->i40e_vsi = pf->fdir.fdir_vsi;
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	memset(rz->addr, 0, I40E_FDIR_NUM_RX_DESC * sizeof(union i40e_rx_desc));
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	memset(rz->addr, 0, I40E_FDIR_NUM_RX_DESC * sizeof(union ci_rx_desc));
+	rxq->rx_ring = (union ci_rx_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -3226,7 +3226,7 @@ void
 i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -3264,7 +3264,7 @@ void
 i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
@@ -3335,7 +3335,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 		if (ad->rx_vec_allowed) {
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
-				struct i40e_rx_queue *rxq =
+				struct ci_rx_queue *rxq =
 					dev->data->rx_queues[i];
 
 				if (rxq && i40e_rxq_vec_setup(rxq)) {
@@ -3438,7 +3438,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 			 dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
 
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
-			struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+			struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 			if (rxq)
 				rxq->vector_rx = vector_rx;
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 3dca32b1ba..05c41d473e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -6,8 +6,9 @@
 #define _I40E_RXTX_H_
 
 #include "../common/tx.h"
+#include "../common/rx.h"
 
-#define I40E_RX_MAX_BURST 32
+#define I40E_RX_MAX_BURST CI_RX_MAX_BURST
 #define I40E_TX_MAX_BURST 32
 
 #define I40E_VPMD_RX_BURST            32
@@ -66,63 +67,6 @@ enum i40e_header_split_mode {
 			       I40E_HEADER_SPLIT_UDP_TCP | \
 			       I40E_HEADER_SPLIT_SCTP)
 
-/* HW desc structure, both 16-byte and 32-byte types are supported */
-#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
-#define i40e_rx_desc i40e_16byte_rx_desc
-#else
-#define i40e_rx_desc i40e_32byte_rx_desc
-#endif
-
-struct i40e_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
-/*
- * Structure associated with each RX queue.
- */
-struct i40e_rx_queue {
-	struct rte_mempool *mp; /**< mbuf pool to populate RX ring */
-	volatile union i40e_rx_desc *rx_ring;/**< RX ring virtual address */
-	uint64_t rx_ring_phys_addr; /**< RX ring DMA address */
-	struct i40e_rx_entry *sw_ring; /**< address of RX soft ring */
-	uint16_t nb_rx_desc; /**< number of RX descriptors */
-	uint16_t rx_free_thresh; /**< max free RX desc to hold */
-	uint16_t rx_tail; /**< current value of tail */
-	uint16_t nb_rx_hold; /**< number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[I40E_RX_MAX_BURST * 2];
-#endif
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /**< device port ID */
-	uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /**< 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /**< RX queue index */
-	uint16_t reg_idx; /**< RX queue register index */
-	uint8_t drop_en; /**< if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /**< register address of tail */
-	struct i40e_vsi *vsi; /**< the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	uint8_t hs_mode; /* Header Split mode */
-	bool q_set; /**< indicate if rx queue has been configured */
-	bool rx_deferred_start; /**< don't start this queue in dev start */
-	uint16_t vector_rx; /**<flag indicate the usage of vPMD for rx */
-	uint8_t dcb_tc;         /**< Traffic class of rx queue */
-	uint64_t offloads; /**< Rx offload flags of RTE_ETH_RX_OFFLOAD_* */
-	const struct rte_memzone *mz;
-};
-
 /** Offload features */
 union i40e_tx_offload {
 	uint64_t data;
@@ -171,16 +115,16 @@ uint16_t i40e_simple_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 int i40e_tx_queue_init(struct ci_tx_queue *txq);
-int i40e_rx_queue_init(struct i40e_rx_queue *rxq);
+int i40e_rx_queue_init(struct ci_rx_queue *rxq);
 void i40e_free_tx_resources(struct ci_tx_queue *txq);
-void i40e_free_rx_resources(struct i40e_rx_queue *rxq);
+void i40e_free_rx_resources(struct ci_rx_queue *rxq);
 void i40e_dev_clear_queues(struct rte_eth_dev *dev);
 void i40e_dev_free_queues(struct rte_eth_dev *dev);
-void i40e_reset_rx_queue(struct i40e_rx_queue *rxq);
+void i40e_reset_rx_queue(struct ci_rx_queue *rxq);
 void i40e_reset_tx_queue(struct ci_tx_queue *txq);
 int i40e_tx_done_cleanup(void *txq, uint32_t free_cnt);
-int i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq);
-void i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq);
+int i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq);
 
 uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
@@ -196,9 +140,9 @@ uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts);
 int i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int i40e_rxq_vec_setup(struct i40e_rx_queue *rxq);
+int i40e_rxq_vec_setup(struct ci_rx_queue *rxq);
 int i40e_txq_vec_setup(struct ci_tx_queue *txq);
-void i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 uint16_t i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				   uint16_t nb_pkts);
 void i40e_set_rx_function(struct rte_eth_dev *dev);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
index 8fc7cd5bd4..97cf5226f6 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
@@ -13,12 +13,12 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index 568891cfb2..a914ef20f4 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -16,13 +16,13 @@
 #include <rte_altivec.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
+	volatile union ci_rx_desc *rxdp;
 
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 
 	__vector unsigned long hdr_room = (__vector unsigned long){
@@ -195,16 +195,16 @@ desc_to_ptype_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__vector unsigned char shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__vector unsigned short crc_adjust = (__vector unsigned short){
 		0, 0,         /* ignore pkt_type field */
@@ -465,7 +465,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -611,13 +611,13 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index a13dd9bc78..fee2a6e670 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -16,7 +16,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, false);
 }
@@ -29,7 +29,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
  * desc_idx: required to select the correct shift at compile time
  */
 static inline __m256i
-desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
+desc_fdir_processing_32b(volatile union ci_rx_desc *rxdp,
 			 struct rte_mbuf **rx_pkts,
 			 const uint32_t pkt_idx,
 			 const uint32_t desc_idx)
@@ -105,14 +105,14 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ci_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 	rte_prefetch0(rxdp);
 
@@ -623,7 +623,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index f0320a221c..e609b7c411 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -16,7 +16,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, true);
 }
@@ -29,7 +29,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
  * desc_idx: required to select the correct shift at compile time
  */
 static inline __m256i
-desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
+desc_fdir_processing_32b(volatile union ci_rx_desc *rxdp,
 			 struct rte_mbuf **rx_pkts,
 			 const uint32_t pkt_idx,
 			 const uint32_t desc_idx)
@@ -106,14 +106,14 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			  uint16_t nb_pkts, uint8_t *split_packet)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ci_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -691,7 +691,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
index ba72df8e13..d19b9e4bf4 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
@@ -21,7 +21,7 @@ i40e_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+_i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned mask = rxq->nb_rx_desc - 1;
 	unsigned i;
@@ -68,7 +68,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 	 */
 	ad->rx_vec_allowed = true;
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads)) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 955382652c..02ba03c290 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -17,12 +17,12 @@
 #include "i40e_rxtx_vec_common.h"
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
@@ -80,7 +80,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 /* NEON version of FDIR mark extraction for 4 32B descriptors at a time */
 static inline uint32x4_t
-descs_to_fdir_32b(volatile union i40e_rx_desc *rxdp, struct rte_mbuf **rx_pkt)
+descs_to_fdir_32b(volatile union ci_rx_desc *rxdp, struct rte_mbuf **rx_pkt)
 {
 	/* 32B descriptors: Load 2nd half of descriptors for FDIR ID data */
 	uint64x2_t desc0_qw23, desc1_qw23, desc2_qw23, desc3_qw23;
@@ -203,7 +203,7 @@ descs_to_fdir_16b(uint32x4_t fltstat, uint64x2_t descs[4], struct rte_mbuf **rx_
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union ci_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	uint32x4_t vlan0, vlan1, rss, l3_l4e;
@@ -332,15 +332,15 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -591,7 +591,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -737,13 +737,13 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 7e7f4c0895..6bafd96797 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -15,12 +15,12 @@
 #include <rte_vect.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
@@ -89,7 +89,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 /* SSE version of FDIR mark extraction for 4 32B descriptors at a time */
 static inline __m128i
-descs_to_fdir_32b(volatile union i40e_rx_desc *rxdp, struct rte_mbuf **rx_pkt)
+descs_to_fdir_32b(volatile union ci_rx_desc *rxdp, struct rte_mbuf **rx_pkt)
 {
 	/* 32B descriptors: Load 2nd half of descriptors for FDIR ID data */
 	__m128i desc0_qw23, desc1_qw23, desc2_qw23, desc3_qw23;
@@ -207,7 +207,7 @@ descs_to_fdir_16b(__m128i fltstat, __m128i descs[4], struct rte_mbuf **rx_pkt)
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union ci_rx_desc *rxdp,
 		  __m128i descs[4], struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -347,16 +347,16 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a I40E_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
-	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -609,7 +609,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,13 +755,13 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 17/25] net/ice: use the common Rx queue structure
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (15 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 16/25] net/i40e: use the " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 18/25] net/iavf: " Anatoly Burakov
                     ` (7 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson

Make the ice driver use the new common Rx queue structure.

In addition to 16-byte and 32-byte descriptors supported by other drivers
which we adjust ice driver to use where necessary, ice driver also
supports flex descriptor format that is also shared by some of the other
drivers, so add a common definition for the flex descriptor formats as
well.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Separate some of the changes into other commits
    - Add a new common flex descriptor format definition
    
    v2:
    - Make xtr_field_offs of type ptrdiff_t instead of off_t to fix 32-bit compile
      issues

 drivers/net/intel/common/desc.h             |  68 ++++++++
 drivers/net/intel/common/rx.h               |  23 +++
 drivers/net/intel/ice/ice_dcf.c             |   3 +-
 drivers/net/intel/ice/ice_dcf_ethdev.c      |  25 ++-
 drivers/net/intel/ice/ice_ethdev.c          |   2 +-
 drivers/net/intel/ice/ice_ethdev.h          |   4 +-
 drivers/net/intel/ice/ice_rxtx.c            | 184 ++++++++++----------
 drivers/net/intel/ice/ice_rxtx.h            |  80 +--------
 drivers/net/intel/ice/ice_rxtx_common_avx.h |   8 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |  14 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |  16 +-
 drivers/net/intel/ice/ice_rxtx_vec_common.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  26 +--
 13 files changed, 241 insertions(+), 218 deletions(-)

diff --git a/drivers/net/intel/common/desc.h b/drivers/net/intel/common/desc.h
index f9e7f27991..dca265b5f6 100644
--- a/drivers/net/intel/common/desc.h
+++ b/drivers/net/intel/common/desc.h
@@ -35,6 +35,30 @@ union ci_rx_desc {
 		} qword1;
 	} wb;  /* writeback */
 };
+
+union ci_rx_flex_desc {
+	struct {
+		rte_le64_t pkt_addr; /* Packet buffer address */
+		rte_le64_t hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+	} read;
+	struct {
+		/* Qword 0 */
+		uint8_t rxdid; /* descriptor builder profile ID */
+		uint8_t mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		rte_le16_t ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		rte_le16_t pkt_len; /* [15:14] are reserved */
+		rte_le16_t hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		rte_le16_t status_error0;
+		rte_le16_t l2tag1;
+		rte_le16_t flex_meta0;
+		rte_le16_t flex_meta1;
+	} wb; /* writeback */
+};
 #else
 union ci_rx_desc {
 	struct {
@@ -84,6 +108,50 @@ union ci_rx_desc {
 		} qword3;
 	} wb;  /* writeback */
 };
+
+union ci_rx_flex_desc {
+	struct {
+		rte_le64_t pkt_addr; /* Packet buffer address */
+		rte_le64_t hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		rte_le64_t rsvd1;
+		rte_le64_t rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		uint8_t rxdid; /* descriptor builder profile ID */
+		uint8_t mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		rte_le16_t ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		rte_le16_t pkt_len; /* [15:14] are reserved */
+		rte_le16_t hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		rte_le16_t status_error0;
+		rte_le16_t l2tag1;
+		rte_le16_t flex_meta0;
+		rte_le16_t flex_meta1;
+
+		/* Qword 2 */
+		rte_le16_t status_error1;
+		uint8_t flex_flags2;
+		uint8_t time_stamp_low;
+		rte_le16_t l2tag2_1st;
+		rte_le16_t l2tag2_2nd;
+
+		/* Qword 3 */
+		rte_le16_t flex_meta2;
+		rte_le16_t flex_meta3;
+		union {
+			struct {
+				rte_le16_t flex_meta4;
+				rte_le16_t flex_meta5;
+			} flex;
+			rte_le32_t ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
 #endif
 
 #endif /* _COMMON_INTEL_DESC_H_ */
diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 8da52fd78e..81b789e828 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -5,6 +5,7 @@
 #ifndef _COMMON_INTEL_RX_H_
 #define _COMMON_INTEL_RX_H_
 
+#include <stddef.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <rte_mbuf.h>
@@ -13,6 +14,7 @@
 #include "desc.h"
 
 #define CI_RX_MAX_BURST 32
+#define CI_RX_MAX_NSEG 2
 
 struct ci_rx_queue;
 
@@ -24,6 +26,8 @@ struct ci_rx_entry_sc {
 	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
 };
 
+typedef void (*ci_rx_release_mbufs_t)(struct ci_rx_queue *rxq);
+
 /**
  * Structure associated with each RX queue.
  */
@@ -32,6 +36,7 @@ struct ci_rx_queue {
 	union { /* RX ring virtual address */
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
 		volatile union ci_rx_desc *rx_ring;
+		volatile union ci_rx_flex_desc *rx_flex_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -64,10 +69,16 @@ struct ci_rx_queue {
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	uint32_t rxdid; /**< RX descriptor format ID. */
+	uint32_t proto_xtr; /* protocol extraction type */
+	uint64_t xtr_ol_flag; /* flexible descriptor metadata extraction offload flag */
+	ptrdiff_t xtr_field_offs; /* Protocol extraction matedata offset*/
+	uint64_t hw_time_update; /**< Last time HW timestamp was updated */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
+		struct ice_vsi *ice_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -85,6 +96,18 @@ struct ci_rx_queue {
 			uint8_t hs_mode; /**< Header Split mode */
 			uint8_t dcb_tc; /**< Traffic class of rx queue */
 		};
+		struct { /* ice specific values */
+			ci_rx_release_mbufs_t rx_rel_mbufs; /**< release mbuf function */
+			/** holds buffer split information */
+			struct rte_eth_rxseg_split rxseg[CI_RX_MAX_NSEG];
+			struct ci_rx_entry *sw_split_buf; /**< Buffer split SW ring */
+			uint32_t rxseg_nb; /**< number of buffer split segments */
+			uint32_t time_high; /* high 32 bits of hardware timestamp register */
+			uint32_t hw_time_high; /* high 32 bits of timestamp */
+			uint32_t hw_time_low; /* low 32 bits of timestamp */
+			int ts_offset; /* dynamic mbuf timestamp field offset */
+			uint64_t ts_flag; /* dynamic mbuf timestamp flag */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/ice/ice_dcf.c b/drivers/net/intel/ice/ice_dcf.c
index 2f7c239491..51716a4d5b 100644
--- a/drivers/net/intel/ice/ice_dcf.c
+++ b/drivers/net/intel/ice/ice_dcf.c
@@ -1175,8 +1175,7 @@ ice_dcf_init_rss(struct ice_dcf_hw *hw)
 int
 ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)hw->eth_dev->data->rx_queues;
+	struct ci_rx_queue **rxq = (struct ci_rx_queue **)hw->eth_dev->data->rx_queues;
 	struct ci_tx_queue **txq =
 		(struct ci_tx_queue **)hw->eth_dev->data->tx_queues;
 	struct virtchnl_vsi_queue_config_info *vc_config;
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index d3fd5d7122..88d943d432 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -106,7 +106,7 @@ ice_dcf_xmit_pkts(__rte_unused void *tx_queue,
 }
 
 static int
-ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
+ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -145,8 +145,7 @@ ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
 static int
 ice_dcf_init_rx_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq = (struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -282,9 +281,9 @@ ice_dcf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 }
 
 static int
-alloc_rxq_mbufs(struct ice_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
-	volatile union ice_rx_flex_desc *rxd;
+	volatile union ci_rx_flex_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
 	uint64_t dma_addr;
 	uint16_t i;
@@ -305,7 +304,7 @@ alloc_rxq_mbufs(struct ice_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = &rxq->rx_flex_ring[i];
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
@@ -324,7 +323,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct iavf_hw *hw = &ad->real_hw.avf;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -358,7 +357,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 }
 
 static inline void
-reset_rx_queue(struct ice_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -368,8 +367,8 @@ reset_rx_queue(struct ice_rx_queue *rxq)
 
 	len = rxq->nb_rx_desc + ICE_RX_MAX_BURST;
 
-	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+	for (i = 0; i < len * sizeof(union ci_rx_flex_desc); i++)
+		((volatile char *)rxq->rx_flex_ring)[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
@@ -429,7 +428,7 @@ ice_dcf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -511,7 +510,7 @@ ice_dcf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 ice_dcf_start_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int nb_rxq = 0;
 	int nb_txq, i;
@@ -638,7 +637,7 @@ ice_dcf_stop_queues(struct rte_eth_dev *dev)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int ret, i;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.c b/drivers/net/intel/ice/ice_ethdev.c
index 7cc083ca32..938c89e773 100644
--- a/drivers/net/intel/ice/ice_ethdev.c
+++ b/drivers/net/intel/ice/ice_ethdev.c
@@ -6724,7 +6724,7 @@ ice_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_adapter *ad =
 			ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t ts_high;
 	uint64_t ts_ns;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.h b/drivers/net/intel/ice/ice_ethdev.h
index bfe093afca..8e5799f8b4 100644
--- a/drivers/net/intel/ice/ice_ethdev.h
+++ b/drivers/net/intel/ice/ice_ethdev.h
@@ -257,7 +257,7 @@ struct ice_vsi_list {
 	struct ice_vsi *vsi;
 };
 
-struct ice_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 
@@ -425,7 +425,7 @@ struct ice_fdir_counter_pool_container {
 struct ice_fdir_info {
 	struct ice_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	struct ci_tx_queue *txq;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt;                 /* memory for fdir program packet */
 	uint64_t dma_addr;             /* physic address of packet memory*/
 	const struct rte_memzone *mz;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 19569b6a38..e2fcc31d0d 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -36,12 +36,12 @@ ice_monitor_callback(const uint64_t value,
 int
 ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = &rxq->rx_flex_ring[desc];
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.status_error0;
 
@@ -73,9 +73,9 @@ ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
-				       volatile union ice_rx_flex_desc *rxdp)
+				       volatile union ci_rx_flex_desc *rxdp)
 {
 	volatile struct ice_32b_rx_flex_desc_comms *desc =
 			(volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
@@ -95,9 +95,9 @@ ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				   struct rte_mbuf *mb,
-				   volatile union ice_rx_flex_desc *rxdp)
+				   volatile union ci_rx_flex_desc *rxdp)
 {
 	volatile struct ice_32b_rx_flex_desc_comms_ovs *desc =
 			(volatile struct ice_32b_rx_flex_desc_comms_ovs *)rxdp;
@@ -120,9 +120,9 @@ ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
-				      volatile union ice_rx_flex_desc *rxdp)
+				      volatile union ci_rx_flex_desc *rxdp)
 {
 	volatile struct ice_32b_rx_flex_desc_comms *desc =
 			(volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
@@ -164,9 +164,9 @@ ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
-				      volatile union ice_rx_flex_desc *rxdp)
+				      volatile union ci_rx_flex_desc *rxdp)
 {
 	volatile struct ice_32b_rx_flex_desc_comms *desc =
 			(volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
@@ -215,7 +215,7 @@ static const ice_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[] = {
 };
 
 void
-ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
+ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -243,17 +243,17 @@ ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
 }
 
 static int
-ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	struct ice_pf *pf = ICE_VSI_TO_PF(vsi);
-	struct rte_eth_dev_data *dev_data = rxq->vsi->adapter->pf.dev_data;
+	struct rte_eth_dev_data *dev_data = rxq->ice_vsi->adapter->pf.dev_data;
 	struct ice_rlan_ctx rx_ctx;
 	uint16_t buf_size;
 	uint32_t rxdid = ICE_RXDID_COMMS_OVS;
 	uint32_t regval;
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 	uint32_t frame_size = dev_data->mtu + ICE_ETH_OVERHEAD;
 	int err;
 
@@ -451,15 +451,15 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 /* Allocate mbufs for all descriptors in rx queue */
 static int
-ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
+ice_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ice_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
-		volatile union ice_rx_flex_desc *rxd;
-		rxd = &rxq->rx_ring[i];
+		volatile union ci_rx_flex_desc *rxd;
+		rxd = &rxq->rx_flex_ring[i];
 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (unlikely(!mbuf)) {
@@ -513,7 +513,7 @@ ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
 
 /* Free all mbufs for descriptors in rx queue */
 static void
-_ice_rx_queue_release_mbufs(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -590,7 +590,7 @@ ice_switch_rx_queue(struct ice_hw *hw, uint16_t q_idx, bool on)
 }
 
 static inline int
-ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
+ice_check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -617,9 +617,9 @@ ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
 	return ret;
 }
 
-/* reset fields in ice_rx_queue back to default */
+/* reset fields in ci_rx_queue back to default */
 static void
-ice_reset_rx_queue(struct ice_rx_queue *rxq)
+ice_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 	uint16_t len;
@@ -631,8 +631,8 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 
 	len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST);
 
-	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+	for (i = 0; i < len * sizeof(union ci_rx_flex_desc); i++)
+		((volatile char *)rxq->rx_flex_ring)[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < ICE_RX_MAX_BURST; ++i)
@@ -654,7 +654,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -714,7 +714,7 @@ ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 ice_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -833,9 +833,9 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 }
 
 static int
-ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_fdir_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	uint32_t rxdid = ICE_RXDID_LEGACY_1;
 	struct ice_rlan_ctx rx_ctx;
@@ -908,7 +908,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_fdir_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1098,7 +1098,7 @@ ice_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 int
 ice_fdir_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1169,7 +1169,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct ice_vsi *vsi = pf->main_vsi;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size, tlen;
 	uint16_t len;
@@ -1205,7 +1205,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket(NULL,
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 
@@ -1239,7 +1239,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 		rxq->crc_len = 0;
 
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->ice_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->proto_xtr = pf->proto_xtr != NULL ?
 			 pf->proto_xtr[queue_idx] : PROTO_XTR_NONE;
@@ -1258,7 +1258,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	len += ICE_RX_MAX_BURST;
 
 	/* Allocate the maximum number of RX ring hardware descriptor. */
-	ring_size = sizeof(union ice_rx_flex_desc) * len;
+	ring_size = sizeof(union ci_rx_flex_desc) * len;
 	ring_size = RTE_ALIGN(ring_size, ICE_DMA_MEM_ALIGN);
 	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
 				      ring_size, ICE_RING_BASE_ALIGN,
@@ -1274,7 +1274,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	memset(rz->addr, 0, ring_size);
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = rz->addr;
+	rxq->rx_flex_ring = rz->addr;
 
 	/* always reserve more for bulk alloc */
 	len = (uint16_t)(nb_desc + ICE_RX_MAX_BURST);
@@ -1286,7 +1286,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the software ring. */
 	rxq->sw_ring = rte_zmalloc_socket(NULL,
-					  sizeof(struct ice_rx_entry) * tlen,
+					  sizeof(struct ci_rx_entry) * tlen,
 					  RTE_CACHE_LINE_SIZE,
 					  socket_id);
 	if (!rxq->sw_ring) {
@@ -1323,7 +1323,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 void
 ice_rx_queue_release(void *rxq)
 {
-	struct ice_rx_queue *q = (struct ice_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -1547,7 +1547,7 @@ void
 ice_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		 struct rte_eth_rxq_info *qinfo)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -1584,12 +1584,12 @@ uint32_t
 ice_rx_queue_count(void *rx_queue)
 {
 #define ICE_RXQ_SCAN_INTERVAL 4
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = &rxq->rx_flex_ring[rxq->rx_tail];
 	while ((desc < rxq->nb_rx_desc) &&
 	       rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	       (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)) {
@@ -1601,8 +1601,7 @@ ice_rx_queue_count(void *rx_queue)
 		desc += ICE_RXQ_SCAN_INTERVAL;
 		rxdp += ICE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-				 desc - rxq->nb_rx_desc]);
+			rxdp = &rxq->rx_flex_ring[rxq->rx_tail + desc - rxq->nb_rx_desc];
 	}
 
 	return desc;
@@ -1655,7 +1654,7 @@ ice_rxd_error_to_pkt_flags(uint16_t stat_err0)
 }
 
 static inline void
-ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
+ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ci_rx_flex_desc *rxdp)
 {
 	if (rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	    (1 << ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
@@ -1694,25 +1693,25 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
 #define ICE_PTP_TS_VALID 0x1
 
 static inline int
-ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
+ice_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len, hdr_len;
 	int32_t s[ICE_LOOK_AHEAD], nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags = 0;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 #endif
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = &rxq->rx_flex_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -1842,7 +1841,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 }
 
 static inline uint16_t
-ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
+ice_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -1861,10 +1860,10 @@ ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
 }
 
 static inline int
-ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
+ice_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -1893,7 +1892,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 		}
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = &rxq->rx_flex_ring[alloc_idx];
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -1932,7 +1931,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = (struct ice_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -1950,7 +1949,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (ice_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 			PMD_RX_LOG(DEBUG, "Rx mbuf alloc failed for "
 				   "port_id=%u, queue_id=%u",
@@ -2005,12 +2004,12 @@ ice_recv_scattered_pkts(void *rx_queue,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
-	volatile union ice_rx_flex_desc *rxdp;
-	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ci_rx_flex_desc *rx_ring = rxq->rx_flex_ring;
+	volatile union ci_rx_flex_desc *rxdp;
+	union ci_rx_flex_desc rxd;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
@@ -2022,13 +2021,13 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2049,7 +2048,7 @@ ice_recv_scattered_pkts(void *rx_queue,
 		/* allocate mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
@@ -2317,8 +2316,8 @@ ice_dev_supported_ptypes_get(struct rte_eth_dev *dev, size_t *no_of_elements)
 int
 ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint32_t desc;
 
 	if (unlikely(offset >= rxq->nb_rx_desc))
@@ -2331,7 +2330,7 @@ ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = &rxq->rx_flex_ring[desc];
 	if (rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	    (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S))
 		return RTE_ETH_RX_DESC_DONE;
@@ -2458,7 +2457,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
 int
 ice_fdir_setup_rx_resources(struct ice_pf *pf)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -2472,7 +2471,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("ice fdir rx queue",
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 SOCKET_ID_ANY);
 	if (!rxq) {
@@ -2498,12 +2497,12 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 	rxq->nb_rx_desc = ICE_FDIR_NUM_RX_DESC;
 	rxq->queue_id = ICE_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->ice_vsi = pf->fdir.fdir_vsi;
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, ICE_FDIR_NUM_RX_DESC *
 	       sizeof(union ice_32byte_rx_desc));
-	rxq->rx_ring = (union ice_rx_flex_desc *)rz->addr;
+	rxq->rx_flex_ring = (union ci_rx_flex_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -2522,12 +2521,12 @@ ice_recv_pkts(void *rx_queue,
 	      struct rte_mbuf **rx_pkts,
 	      uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
-	volatile union ice_rx_flex_desc *rxdp;
-	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ci_rx_flex_desc *rx_ring = rxq->rx_flex_ring;
+	volatile union ci_rx_flex_desc *rxdp;
+	union ci_rx_flex_desc rxd;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
 	struct rte_mbuf *nmb_pay; /* new allocated payload mbuf */
 	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
@@ -2539,13 +2538,13 @@ ice_recv_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2566,7 +2565,7 @@ ice_recv_pkts(void *rx_queue,
 		/* allocate header mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 
@@ -2593,7 +2592,7 @@ ice_recv_pkts(void *rx_queue,
 			/* allocate payload mbuf */
 			nmb_pay = rte_mbuf_raw_alloc(rxq->rxseg[1].mp);
 			if (unlikely(!nmb_pay)) {
-				rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+				rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 				rxe->mbuf = NULL;
 				nb_hold--;
 				if (unlikely(rx_id == 0))
@@ -3471,7 +3470,7 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int i;
 	int rx_check_ret = -1;
 
@@ -4633,7 +4632,7 @@ ice_set_default_ptype_table(struct rte_eth_dev *dev)
  * tx queue
  */
 static inline int
-ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
+ice_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union ice_32byte_rx_desc *rxdp;
 	uint64_t qword1;
@@ -4642,8 +4641,7 @@ ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
 	uint32_t id;
 	int ret = -EAGAIN;
 
-	rxdp = (volatile union ice_32byte_rx_desc *)
-		(&rxq->rx_ring[rxq->rx_tail]);
+	rxdp = (volatile union ice_32byte_rx_desc *)&rxq->rx_flex_ring[rxq->rx_tail];
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & ICE_RXD_QW1_STATUS_M)
 			>> ICE_RXD_QW1_STATUS_S;
@@ -4688,7 +4686,7 @@ int
 ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct ice_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	volatile struct ice_fltr_desc *fdirdp;
 	volatile struct ice_tx_desc *txdp;
 	uint32_t td_cmd;
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 52c753ba7c..62f98579f5 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _ICE_RXTX_H_
 #define _ICE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 #include "ice_ethdev.h"
 
@@ -14,21 +15,15 @@
 #define ICE_DMA_MEM_ALIGN    4096
 #define ICE_RING_BASE_ALIGN  128
 
-#define ICE_RX_MAX_BURST 32
+#define ICE_RX_MAX_BURST CI_RX_MAX_BURST
 #define ICE_TX_MAX_BURST 32
 
 /* Maximal number of segments to split. */
-#define ICE_RX_MAX_NSEG 2
+#define ICE_RX_MAX_NSEG CI_RX_MAX_NSEG
 
 #define ICE_CHK_Q_ENA_COUNT        100
 #define ICE_CHK_Q_ENA_INTERVAL_US  100
 
-#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
-#define ice_rx_flex_desc ice_16b_rx_flex_desc
-#else
-#define ice_rx_flex_desc ice_32b_rx_flex_desc
-#endif
-
 #define ICE_SUPPORT_CHAIN_NUM 5
 
 #define ICE_TD_CMD                      ICE_TX_DESC_CMD_EOP
@@ -75,14 +70,9 @@
 
 #define ICE_TX_MTU_SEG_MAX	8
 
-typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
-typedef void (*ice_rxd_to_pkt_fields_t)(struct ice_rx_queue *rxq,
+typedef void (*ice_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 					struct rte_mbuf *mb,
-					volatile union ice_rx_flex_desc *rxdp);
-
-struct ice_rx_entry {
-	struct rte_mbuf *mbuf;
-};
+					volatile union ci_rx_flex_desc *rxdp);
 
 enum ice_rx_dtype {
 	ICE_RX_DTYPE_NO_SPLIT       = 0,
@@ -90,60 +80,6 @@ enum ice_rx_dtype {
 	ICE_RX_DTYPE_SPLIT_ALWAYS   = 2,
 };
 
-struct ice_rx_queue {
-	struct rte_mempool *mp; /* mbuf pool to populate RX ring */
-	volatile union ice_rx_flex_desc *rx_ring;/* RX ring virtual address */
-	rte_iova_t rx_ring_phys_addr; /* RX ring DMA address */
-	struct ice_rx_entry *sw_ring; /* address of RX soft ring */
-	uint16_t nb_rx_desc; /* number of RX descriptors */
-	uint16_t rx_free_thresh; /* max free RX desc to hold */
-	uint16_t rx_tail; /* current value of tail */
-	uint16_t nb_rx_hold; /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-	struct rte_mbuf *rx_stage[ICE_RX_MAX_BURST * 2];
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /* device port ID */
-	uint8_t crc_len; /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /* RX queue index */
-	uint16_t reg_idx; /* RX queue register index */
-	uint8_t drop_en; /* if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /* register address of tail */
-	struct ice_vsi *vsi; /* the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	bool q_set; /* indicate if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	uint8_t proto_xtr; /* Protocol extraction from flexible descriptor */
-	int xtr_field_offs; /*Protocol extraction matedata offset*/
-	uint64_t xtr_ol_flag; /* Protocol extraction offload flag */
-	uint32_t rxdid; /* Receive Flex Descriptor profile ID */
-	ice_rx_release_mbufs_t rx_rel_mbufs;
-	uint64_t offloads;
-	uint32_t time_high;
-	uint32_t hw_register_set;
-	const struct rte_memzone *mz;
-	uint32_t hw_time_high; /* high 32 bits of timestamp */
-	uint32_t hw_time_low; /* low 32 bits of timestamp */
-	uint64_t hw_time_update; /* SW time of HW record updating */
-	struct ice_rx_entry *sw_split_buf;
-	/* address of temp buffer for RX split mbufs */
-	struct rte_eth_rxseg_split rxseg[ICE_RX_MAX_NSEG];
-	uint32_t rxseg_nb;
-	int ts_offset; /* dynamic mbuf timestamp field offset */
-	uint64_t ts_flag; /* dynamic mbuf timestamp flag */
-};
-
 /* Offload features */
 union ice_tx_offload {
 	uint64_t data;
@@ -247,12 +183,12 @@ int ice_tx_descriptor_status(void *tx_queue, uint16_t offset);
 void ice_set_default_ptype_table(struct rte_eth_dev *dev);
 const uint32_t *ice_dev_supported_ptypes_get(struct rte_eth_dev *dev,
 					     size_t *no_of_elements);
-void ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq,
+void ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq,
 					  uint32_t rxdid);
 
 int ice_rx_vec_dev_check(struct rte_eth_dev *dev);
 int ice_tx_vec_dev_check(struct rte_eth_dev *dev);
-int ice_rxq_vec_setup(struct ice_rx_queue *rxq);
+int ice_rxq_vec_setup(struct ci_rx_queue *rxq);
 int ice_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
@@ -297,7 +233,7 @@ int ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 #define FDIR_PARSING_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->pf.dev_data->nb_rx_queues; i++) { \
-		struct ice_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
index d1c772bf06..7c65e7ed4d 100644
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ b/drivers/net/intel/ice/ice_rxtx_common_avx.h
@@ -9,14 +9,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->rx_flex_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 5ed669fc30..5b1a13dd22 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -8,7 +8,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, false);
 }
@@ -33,15 +33,15 @@ ice_flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet,
 			    bool offload)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ci_rx_flex_desc *rxdp = rxq->rx_flex_ring + rxq->rx_tail;
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -443,7 +443,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -692,7 +692,7 @@ static __rte_always_inline uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts, bool offload)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index e52e9e9ceb..b943caf0f0 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -8,7 +8,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, true);
 }
@@ -33,17 +33,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
+_ice_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			      struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts,
 			      uint8_t *split_packet,
 			      bool do_offload)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ci_rx_flex_desc *rxdp = rxq->rx_flex_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -465,7 +465,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -721,7 +721,7 @@ static uint16_t
 ice_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -763,7 +763,7 @@ ice_recv_scattered_burst_vec_avx512_offload(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_common.h b/drivers/net/intel/ice/ice_rxtx_vec_common.h
index 7933c26366..9430a99ba5 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/intel/ice/ice_rxtx_vec_common.h
@@ -17,7 +17,7 @@ ice_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -79,7 +79,7 @@ _ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
 #define ICE_VECTOR_OFFLOAD_PATH	1
 
 static inline int
-ice_rx_vec_queue_default(struct ice_rx_queue *rxq)
+ice_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -119,7 +119,7 @@ static inline int
 ice_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret = 0;
 	int result = 0;
 
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 36da5b5d1b..cae2188279 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -26,18 +26,18 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 }
 
 static inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 					  RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->rx_flex_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -105,7 +105,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 }
 
 static inline void
-ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
+ice_rx_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 			 struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -301,15 +301,15 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a ICE_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts, uint8_t *split_packet)
 {
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *sw_ring;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	__m128i crc_adjust = _mm_set_epi16
 				(0, 0, 0,       /* ignore non-length fields */
 				 -rxq->crc_len, /* sub crc on data_len */
@@ -361,7 +361,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->rx_flex_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -482,7 +482,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		 * needs to load 2nd 16B of each desc for RSS hash parsing,
 		 * will cause performance drop to get into this context.
 		 */
-		if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+		if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 			/* load bottom half of every 32B desc */
 			const __m128i raw_desc_bh3 =
@@ -608,7 +608,7 @@ static uint16_t
 ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -779,7 +779,7 @@ ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 int __rte_cold
-ice_rxq_vec_setup(struct ice_rx_queue *rxq)
+ice_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 18/25] net/iavf: use the common Rx queue structure
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (16 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 17/25] net/ice: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 19/25] net/intel: generalize vectorized Rx rearm Anatoly Burakov
                     ` (6 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin, Ian Stokes

Make the iavf driver use the new common Rx queue structure.

The iavf driver supports both 16-byte and 32-byte descriptors, in both
regular and flex formats, so replace all usages of iavf-specific versions
of these descriptors with the common ones.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Use the common descriptor format
    
    v2:
    - Fix compile issues for Arm

 drivers/net/intel/common/rx.h                 |  10 +
 drivers/net/intel/iavf/iavf.h                 |   4 +-
 drivers/net/intel/iavf/iavf_ethdev.c          |  11 +-
 drivers/net/intel/iavf/iavf_rxtx.c            | 228 +++++++++---------
 drivers/net/intel/iavf/iavf_rxtx.h            | 156 +-----------
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  26 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  23 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  27 +--
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  30 +--
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  48 ++--
 drivers/net/intel/iavf/iavf_vchnl.c           |   6 +-
 11 files changed, 219 insertions(+), 350 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 81b789e828..8d5466eb44 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -79,6 +79,7 @@ struct ci_rx_queue {
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
 		struct ice_vsi *ice_vsi;
+		struct iavf_vsi *iavf_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -108,6 +109,15 @@ struct ci_rx_queue {
 			int ts_offset; /* dynamic mbuf timestamp field offset */
 			uint64_t ts_flag; /* dynamic mbuf timestamp flag */
 		};
+		struct { /* iavf specific values */
+			const struct iavf_rxq_ops *ops; /**< queue ops */
+			struct iavf_rx_queue_stats *stats; /**< per-queue stats */
+			uint64_t phc_time; /**< HW timestamp */
+			uint8_t rel_mbufs_type; /**< type of release mbuf function */
+			uint8_t rx_flags; /**< Rx VLAN tag location flags */
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
+		};
 	};
 };
 
diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 97e6b243fb..f81c939c96 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -97,7 +97,7 @@
 #define IAVF_L2TPV2_FLAGS_LEN	0x4000
 
 struct iavf_adapter;
-struct iavf_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 
@@ -555,7 +555,7 @@ int iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
 		uint8_t *resp_msg, size_t resp_msg_len);
 extern const struct rte_tm_ops iavf_tm_ops;
 int iavf_get_ptp_cap(struct iavf_adapter *adapter);
-int iavf_get_phc_time(struct iavf_rx_queue *rxq);
+int iavf_get_phc_time(struct ci_rx_queue *rxq);
 int iavf_flow_sub(struct iavf_adapter *adapter,
 		  struct iavf_fsub_conf *filter);
 int iavf_flow_unsub(struct iavf_adapter *adapter,
diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index 5babd587b3..02649c19b2 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -728,7 +728,7 @@ iavf_dev_configure(struct rte_eth_dev *dev)
 }
 
 static int
-iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
+iavf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -779,8 +779,7 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
 static int
 iavf_init_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue **rxq =
-		(struct iavf_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq = (struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret = IAVF_SUCCESS;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -955,7 +954,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 static int
 iavf_start_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 	uint16_t nb_txq, nb_rxq;
@@ -1867,9 +1866,9 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 {
 	uint16_t idx;
 	for (idx = 0; idx < ethdev->data->nb_rx_queues; idx++) {
-		struct iavf_rx_queue *rxq;
+		struct ci_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
-		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
+		rxq = (struct ci_rx_queue *)ethdev->data->rx_queues[idx];
 		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 2aed22800e..44b0fc69c6 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -128,8 +128,8 @@ iavf_monitor_callback(const uint64_t value,
 int
 iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
-	volatile union iavf_rx_desc *rxdp;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ci_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
@@ -222,7 +222,7 @@ check_tx_vec_allow(struct ci_tx_queue *txq)
 }
 
 static inline bool
-check_rx_bulk_allow(struct iavf_rx_queue *rxq)
+check_rx_bulk_allow(struct ci_rx_queue *rxq)
 {
 	int ret = true;
 
@@ -243,7 +243,7 @@ check_rx_bulk_allow(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-reset_rx_queue(struct iavf_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -253,13 +253,13 @@ reset_rx_queue(struct iavf_rx_queue *rxq)
 
 	len = rxq->nb_rx_desc + IAVF_RX_MAX_BURST;
 
-	for (i = 0; i < len * sizeof(union iavf_rx_desc); i++)
+	for (i = 0; i < len * sizeof(union ci_rx_desc); i++)
 		((volatile char *)rxq->rx_ring)[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
 	for (i = 0; i < IAVF_RX_MAX_BURST; i++)
-		rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf;
+		rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
 
 	/* for rx bulk */
 	rxq->rx_nb_avail = 0;
@@ -315,9 +315,9 @@ reset_tx_queue(struct ci_tx_queue *txq)
 }
 
 static int
-alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
-	volatile union iavf_rx_desc *rxd;
+	volatile union ci_rx_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
 	uint64_t dma_addr;
 	uint16_t i, j;
@@ -326,8 +326,8 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!mbuf)) {
 			for (j = 0; j < i; j++) {
-				rte_pktmbuf_free_seg(rxq->sw_ring[j]);
-				rxq->sw_ring[j] = NULL;
+				rte_pktmbuf_free_seg(rxq->sw_ring[j].mbuf);
+				rxq->sw_ring[j].mbuf = NULL;
 			}
 			PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
 			return -ENOMEM;
@@ -350,14 +350,14 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		rxd->read.rsvd2 = 0;
 #endif
 
-		rxq->sw_ring[i] = mbuf;
+		rxq->sw_ring[i].mbuf = mbuf;
 	}
 
 	return 0;
 }
 
 static inline void
-release_rxq_mbufs(struct iavf_rx_queue *rxq)
+release_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -365,9 +365,9 @@ release_rxq_mbufs(struct iavf_rx_queue *rxq)
 		return;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
-		if (rxq->sw_ring[i]) {
-			rte_pktmbuf_free_seg(rxq->sw_ring[i]);
-			rxq->sw_ring[i] = NULL;
+		if (rxq->sw_ring[i].mbuf) {
+			rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+			rxq->sw_ring[i].mbuf = NULL;
 		}
 	}
 
@@ -395,9 +395,9 @@ struct iavf_rxq_ops iavf_rxq_release_mbufs_ops[] = {
 };
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				    struct rte_mbuf *mb,
-				    volatile union iavf_rx_flex_desc *rxdp)
+				    volatile union ci_rx_flex_desc *rxdp)
 {
 	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
 			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
@@ -420,9 +420,9 @@ iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
-				       volatile union iavf_rx_flex_desc *rxdp)
+				       volatile union ci_rx_flex_desc *rxdp)
 {
 	volatile struct iavf_32b_rx_flex_desc_comms *desc =
 			(volatile struct iavf_32b_rx_flex_desc_comms *)rxdp;
@@ -462,9 +462,9 @@ iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
-				       volatile union iavf_rx_flex_desc *rxdp)
+				       volatile union ci_rx_flex_desc *rxdp)
 {
 	volatile struct iavf_32b_rx_flex_desc_comms *desc =
 			(volatile struct iavf_32b_rx_flex_desc_comms *)rxdp;
@@ -517,7 +517,7 @@ iavf_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[IAVF_RXDID_LAST + 1] = {
 };
 
 static void
-iavf_select_rxd_to_pkt_fields_handler(struct iavf_rx_queue *rxq, uint32_t rxdid)
+iavf_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -572,7 +572,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	struct iavf_info *vf =
 		IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_vsi *vsi = &vf->vsi;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *mz;
 	uint32_t ring_size;
 	uint8_t proto_xtr;
@@ -610,7 +610,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("iavf rxq",
-				 sizeof(struct iavf_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -668,7 +668,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	rxq->port_id = dev->data->port_id;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->rx_hdr_len = 0;
-	rxq->vsi = vsi;
+	rxq->iavf_vsi = vsi;
 	rxq->offloads = offloads;
 
 	if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
@@ -698,7 +698,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	 * a little more to support bulk allocate.
 	 */
 	len = IAVF_MAX_RING_DESC + IAVF_RX_MAX_BURST;
-	ring_size = RTE_ALIGN(len * sizeof(union iavf_rx_desc),
+	ring_size = RTE_ALIGN(len * sizeof(union ci_rx_desc),
 			      IAVF_DMA_MEM_ALIGN);
 	mz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
 				      ring_size, IAVF_RING_BASE_ALIGN,
@@ -713,7 +713,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	/* Zero all the descriptors in the ring. */
 	memset(mz->addr, 0, ring_size);
 	rxq->rx_ring_phys_addr = mz->iova;
-	rxq->rx_ring = (union iavf_rx_desc *)mz->addr;
+	rxq->rx_ring = (union ci_rx_desc *)mz->addr;
 
 	rxq->mz = mz;
 	reset_rx_queue(rxq);
@@ -905,7 +905,7 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	PMD_DRV_FUNC_TRACE();
@@ -997,7 +997,7 @@ iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	PMD_DRV_FUNC_TRACE();
@@ -1060,7 +1060,7 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 void
 iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-	struct iavf_rx_queue *q = dev->data->rx_queues[qid];
+	struct ci_rx_queue *q = dev->data->rx_queues[qid];
 
 	if (!q)
 		return;
@@ -1089,7 +1089,7 @@ iavf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 static void
 iavf_reset_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 
@@ -1151,7 +1151,7 @@ iavf_stop_queues(struct rte_eth_dev *dev)
 	 (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
 
 static inline void
-iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
+iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ci_rx_desc *rxdp)
 {
 	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		(1 << IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
@@ -1165,7 +1165,7 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 
 static inline void
 iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
-			  volatile union iavf_rx_flex_desc *rxdp)
+			  volatile union ci_rx_flex_desc *rxdp)
 {
 	if (rte_le_to_cpu_64(rxdp->wb.status_error0) &
 		(1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
@@ -1197,7 +1197,7 @@ iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
 
 static inline void
 iavf_flex_rxd_to_ipsec_crypto_said_get(struct rte_mbuf *mb,
-			  volatile union iavf_rx_flex_desc *rxdp)
+			  volatile union ci_rx_flex_desc *rxdp)
 {
 	volatile struct iavf_32b_rx_flex_desc_comms_ipsec *desc =
 		(volatile struct iavf_32b_rx_flex_desc_comms_ipsec *)rxdp;
@@ -1208,7 +1208,7 @@ iavf_flex_rxd_to_ipsec_crypto_said_get(struct rte_mbuf *mb,
 
 static inline void
 iavf_flex_rxd_to_ipsec_crypto_status(struct rte_mbuf *mb,
-			  volatile union iavf_rx_flex_desc *rxdp,
+			  volatile union ci_rx_flex_desc *rxdp,
 			  struct iavf_ipsec_crypto_stats *stats)
 {
 	uint16_t status1 = rte_le_to_cpu_64(rxdp->wb.status_error1);
@@ -1298,7 +1298,7 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 }
 
 static inline uint64_t
-iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
+iavf_rxd_build_fdir(volatile union ci_rx_desc *rxdp, struct rte_mbuf *mb)
 {
 	uint64_t flags = 0;
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
@@ -1375,7 +1375,7 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
  * from the hardware point of view.
  */
 static inline void
-iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+iavf_update_rx_tail(struct ci_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
 {
 	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
 
@@ -1395,11 +1395,11 @@ iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
 uint16_t
 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	volatile union iavf_rx_desc *rx_ring;
-	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
-	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	volatile union ci_rx_desc *rx_ring;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_queue *rxq;
+	union ci_rx_desc rxd;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1417,7 +1417,7 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
 	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1442,13 +1442,13 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1456,9 +1456,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1504,11 +1504,11 @@ uint16_t
 iavf_recv_pkts_flex_rxd(void *rx_queue,
 			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	volatile union iavf_rx_desc *rx_ring;
-	volatile union iavf_rx_flex_desc *rxdp;
-	struct iavf_rx_queue *rxq;
-	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	volatile union ci_rx_flex_desc *rx_ring;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_queue *rxq;
+	union ci_rx_flex_desc rxd;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1525,8 +1525,8 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = rxq->rx_flex_ring;
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1539,7 +1539,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 	}
 
 	while (nb_rx < nb_pkts) {
-		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rxdp = &rx_ring[rx_id];
 		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
 
 		/* Check the DD bit first */
@@ -1559,13 +1559,13 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1573,9 +1573,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1629,9 +1629,9 @@ uint16_t
 iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
-	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_queue *rxq = rx_queue;
+	union ci_rx_flex_desc rxd;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1643,9 +1643,9 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t pkt_flags;
 	uint64_t ts_ns;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
-	volatile union iavf_rx_flex_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	volatile union ci_rx_flex_desc *rx_ring = rxq->rx_flex_ring;
+	volatile union ci_rx_flex_desc *rxdp;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1658,7 +1658,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 	}
 
 	while (nb_rx < nb_pkts) {
-		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rxdp = &rx_ring[rx_id];
 		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
 
 		/* Check the DD bit */
@@ -1678,13 +1678,13 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1692,10 +1692,10 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1806,9 +1806,9 @@ uint16_t
 iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
-	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_queue *rxq = rx_queue;
+	union ci_rx_desc rxd;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1820,9 +1820,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
-	volatile union iavf_rx_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	volatile union ci_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union ci_rx_desc *rxdp;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1847,13 +1847,13 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1861,10 +1861,10 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1963,12 +1963,12 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 #define IAVF_LOOK_AHEAD 8
 static inline int
-iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
+iavf_rx_scan_hw_ring_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts)
 {
-	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **rxep;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len;
@@ -1976,10 +1976,10 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 	uint64_t ts_ns;
 
-	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxdp = &rxq->rx_flex_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2038,7 +2038,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 					  rxq->rx_tail +
 					  i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
 				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
 			mb->data_len = pkt_len;
@@ -2072,11 +2072,11 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else {
 				/* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2090,16 +2090,16 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline int
-iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+iavf_rx_scan_hw_ring(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -2108,7 +2108,7 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
@@ -2164,7 +2164,7 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
 					 rxq->rx_tail + i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			qword1 = rte_le_to_cpu_64
 					(rxdp[j].wb.qword1.status_error_len);
 			pkt_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
@@ -2190,10 +2190,10 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else { /* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2207,13 +2207,13 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline uint16_t
-iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
+iavf_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -2232,10 +2232,10 @@ iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
 }
 
 static inline int
-iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
+iavf_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -2256,9 +2256,9 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
-			rte_prefetch0(rxep[i + 1]);
+			rte_prefetch0(rxep[i + 1].mbuf);
 
-		mb = rxep[i];
+		mb = rxep[i].mbuf;
 		rte_mbuf_refcnt_set(mb, 1);
 		mb->next = NULL;
 		mb->data_off = RTE_PKTMBUF_HEADROOM;
@@ -2284,7 +2284,7 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = (struct iavf_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -2312,11 +2312,11 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 			rxq->rx_tail = (uint16_t)(rxq->rx_tail - (nb_rx + nb_staged));
 			for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++) {
-				rxq->sw_ring[j] = rx_pkts[i];
+				rxq->sw_ring[j].mbuf = rx_pkts[i];
 				rx_pkts[i] = NULL;
 			}
 			for (i = 0, j = rxq->rx_tail + nb_rx; i < nb_staged; i++, j++) {
-				rxq->sw_ring[j] = rxq->rx_stage[i];
+				rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
 				rx_pkts[i] = NULL;
 			}
 
@@ -3843,13 +3843,13 @@ static uint16_t
 iavf_recv_pkts_no_poll(void *rx_queue, struct rte_mbuf **rx_pkts,
 				uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	enum iavf_rx_burst_type rx_burst_type;
 
-	if (!rxq->vsi || rxq->vsi->adapter->no_poll)
+	if (!rxq->iavf_vsi || rxq->iavf_vsi->adapter->no_poll)
 		return 0;
 
-	rx_burst_type = rxq->vsi->adapter->rx_burst_type;
+	rx_burst_type = rxq->iavf_vsi->adapter->rx_burst_type;
 
 	return iavf_rx_pkt_burst_ops[rx_burst_type].pkt_burst(rx_queue,
 								rx_pkts, nb_pkts);
@@ -3965,7 +3965,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	enum iavf_rx_burst_type rx_burst_type;
 	int no_poll_on_link_down = adapter->devargs.no_poll_on_link_down;
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool use_flex = true;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -4379,7 +4379,7 @@ void
 iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		     struct rte_eth_rxq_info *qinfo)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -4413,8 +4413,8 @@ uint32_t
 iavf_dev_rxq_count(void *rx_queue)
 {
 #define IAVF_RXQ_SCAN_INTERVAL 4
-	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
@@ -4441,7 +4441,7 @@ iavf_dev_rxq_count(void *rx_queue)
 int
 iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 8c0bb5475d..98abebae90 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -17,7 +17,7 @@
 #define IAVF_RING_BASE_ALIGN      128
 
 /* used for Rx Bulk Allocate */
-#define IAVF_RX_MAX_BURST         32
+#define IAVF_RX_MAX_BURST         CI_RX_MAX_BURST
 
 /* Max data buffer size must be 16K - 128 bytes */
 #define IAVF_RX_MAX_DATA_BUF_SIZE (16 * 1024 - 128)
@@ -123,93 +123,12 @@ extern uint64_t iavf_timestamp_dynflag;
 extern int iavf_timestamp_dynfield_offset;
 extern int rte_pmd_iavf_tx_lldp_dynfield_offset;
 
-/**
- * Rx Flex Descriptors
- * These descriptors are used instead of the legacy version descriptors
- */
-union iavf_16b_rx_flex_desc {
-	struct {
-		__le64 pkt_addr; /* Packet buffer address */
-		__le64 hdr_addr; /* Header buffer address */
-				 /* bit 0 of hdr_addr is DD bit */
-	} read;
-	struct {
-		/* Qword 0 */
-		u8 rxdid; /* descriptor builder profile ID */
-		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
-		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
-		__le16 pkt_len; /* [15:14] are reserved */
-		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
-						/* sph=[11:11] */
-						/* ff1/ext=[15:12] */
-
-		/* Qword 1 */
-		__le16 status_error0;
-		__le16 l2tag1;
-		__le16 flex_meta0;
-		__le16 flex_meta1;
-	} wb; /* writeback */
-};
-
-union iavf_32b_rx_flex_desc {
-	struct {
-		__le64 pkt_addr; /* Packet buffer address */
-		__le64 hdr_addr; /* Header buffer address */
-				 /* bit 0 of hdr_addr is DD bit */
-		__le64 rsvd1;
-		__le64 rsvd2;
-	} read;
-	struct {
-		/* Qword 0 */
-		u8 rxdid; /* descriptor builder profile ID */
-		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
-		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
-		__le16 pkt_len; /* [15:14] are reserved */
-		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
-						/* sph=[11:11] */
-						/* ff1/ext=[15:12] */
-
-		/* Qword 1 */
-		__le16 status_error0;
-		__le16 l2tag1;
-		__le16 flex_meta0;
-		__le16 flex_meta1;
-
-		/* Qword 2 */
-		__le16 status_error1;
-		u8 flex_flags2;
-		u8 time_stamp_low;
-		__le16 l2tag2_1st;
-		__le16 l2tag2_2nd;
-
-		/* Qword 3 */
-		__le16 flex_meta2;
-		__le16 flex_meta3;
-		union {
-			struct {
-				__le16 flex_meta4;
-				__le16 flex_meta5;
-			} flex;
-			__le32 ts_high;
-		} flex_ts;
-	} wb; /* writeback */
-};
-
-/* HW desc structure, both 16-byte and 32-byte types are supported */
-#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
-#define iavf_rx_desc iavf_16byte_rx_desc
-#define iavf_rx_flex_desc iavf_16b_rx_flex_desc
-#else
-#define iavf_rx_desc iavf_32byte_rx_desc
-#define iavf_rx_flex_desc iavf_32b_rx_flex_desc
-#endif
-
-typedef void (*iavf_rxd_to_pkt_fields_t)(struct iavf_rx_queue *rxq,
+typedef void (*iavf_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 				struct rte_mbuf *mb,
-				volatile union iavf_rx_flex_desc *rxdp);
+				volatile union ci_rx_flex_desc *rxdp);
 
 struct iavf_rxq_ops {
-	void (*release_mbufs)(struct iavf_rx_queue *rxq);
+	void (*release_mbufs)(struct ci_rx_queue *rxq);
 };
 
 struct iavf_txq_ops {
@@ -222,59 +141,6 @@ struct iavf_rx_queue_stats {
 	struct iavf_ipsec_crypto_stats ipsec_crypto;
 };
 
-/* Structure associated with each Rx queue. */
-struct iavf_rx_queue {
-	struct rte_mempool *mp;       /* mbuf pool to populate Rx ring */
-	const struct rte_memzone *mz; /* memzone for Rx ring */
-	volatile union iavf_rx_desc *rx_ring; /* Rx ring virtual address */
-	uint64_t rx_ring_phys_addr;   /* Rx ring DMA address */
-	struct rte_mbuf **sw_ring;     /* address of SW ring */
-	uint16_t nb_rx_desc;          /* ring length */
-	uint16_t rx_tail;             /* current value of tail */
-	volatile uint8_t *qrx_tail;   /* register address of tail */
-	uint16_t rx_free_thresh;      /* max free RX desc to hold */
-	uint16_t nb_rx_hold;          /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
-	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
-	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
-	uint8_t rxdid;
-	uint8_t rel_mbufs_type;
-
-	/* used for VPMD */
-	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
-	uint16_t rxrearm_start;    /* the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /* value to init mbufs */
-
-	/* for rx bulk */
-	uint16_t rx_nb_avail;      /* number of staged packets ready */
-	uint16_t rx_next_avail;    /* index of next staged packets */
-	uint16_t rx_free_trigger;  /* triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[IAVF_RX_MAX_BURST * 2]; /* store mbuf */
-
-	uint16_t port_id;        /* device port ID */
-	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id;      /* Rx queue index */
-	uint16_t rx_buf_len;    /* The packet buffer size */
-	uint16_t rx_hdr_len;    /* The header buffer size */
-	uint16_t max_pkt_len;   /* Maximum packet length */
-	struct iavf_vsi *vsi; /**< the VSI this queue belongs to */
-
-	bool q_set;             /* if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	const struct iavf_rxq_ops *ops;
-	uint8_t rx_flags;
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
-	uint8_t proto_xtr; /* protocol extraction type */
-	uint64_t xtr_ol_flag;
-		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats *stats;
-	uint64_t offloads;
-	uint64_t phc_time;
-	uint64_t hw_time_update;
-};
-
 /* Offload features */
 union iavf_tx_offload {
 	uint64_t data;
@@ -692,7 +558,7 @@ uint16_t iavf_xmit_pkts_vec_avx2_offload(void *tx_queue, struct rte_mbuf **tx_pk
 int iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 int iavf_rx_vec_dev_check(struct rte_eth_dev *dev);
 int iavf_tx_vec_dev_check(struct rte_eth_dev *dev);
-int iavf_rxq_vec_setup(struct iavf_rx_queue *rxq);
+int iavf_rxq_vec_setup(struct ci_rx_queue *rxq);
 int iavf_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t iavf_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts);
@@ -732,23 +598,23 @@ uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
 
 void iavf_set_default_ptype_table(struct rte_eth_dev *dev);
 void iavf_tx_queue_release_mbufs_avx512(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq);
 void iavf_tx_queue_release_mbufs_sse(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq);
 
 static inline
-void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq,
+void iavf_dump_rx_descriptor(struct ci_rx_queue *rxq,
 			    const volatile void *desc,
 			    uint16_t rx_id)
 {
 #ifdef RTE_NET_INTEL_USE_16BYTE_DESC
-	const volatile union iavf_16byte_rx_desc *rx_desc = desc;
+	const volatile union ci_rx_desc *rx_desc = desc;
 
 	printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64"\n",
 	       rxq->queue_id, rx_id, rx_desc->read.pkt_addr,
 	       rx_desc->read.hdr_addr);
 #else
-	const volatile union iavf_32byte_rx_desc *rx_desc = desc;
+	const volatile union ci_rx_desc *rx_desc = desc;
 
 	printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64
 	       " QW2: 0x%016"PRIx64" QW3: 0x%016"PRIx64"\n", rxq->queue_id,
@@ -795,7 +661,7 @@ void iavf_dump_tx_descriptor(const struct ci_tx_queue *txq,
 #define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->dev_data->nb_rx_queues; i++) { \
-		struct iavf_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index 40b265183f..319f0166ce 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -7,7 +7,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, false);
 }
@@ -15,19 +15,16 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 #define PKTLEN_SHIFT     10
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq,
 			     struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts, uint8_t *split_packet,
 			     bool offload)
 {
-	/* const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; */
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
-
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	/* struct iavf_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail]; */
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ci_rx_desc *rxdp = &rxq->rx_ring[rxq->rx_tail];
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -485,12 +482,12 @@ flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct ci_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts, uint8_t *split_packet,
 				      bool offload)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
@@ -499,9 +496,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ci_rx_flex_desc *rxdp = rxq->rx_flex_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -1472,7 +1468,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1561,7 +1557,7 @@ iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index 53bc69ecf6..d2aeccf5e6 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -27,26 +27,26 @@
 #define IAVF_RX_TS_OFFLOAD
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			       struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts, uint8_t *split_packet,
 			       bool offload)
 {
 #ifdef IAVF_RX_PTYPE_OFFLOAD
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 #endif
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ci_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -577,13 +577,13 @@ flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct ci_rx_queue *rxq,
 					struct rte_mbuf **rx_pkts,
 					uint16_t nb_pkts,
 					uint8_t *split_packet,
 					bool offload)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -593,9 +593,8 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ci_rx_flex_desc *rxdp = rxq->rx_flex_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -1652,7 +1651,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1728,7 +1727,7 @@ iavf_recv_scattered_burst_vec_avx512_flex_rxd(void *rx_queue,
 					      uint16_t nb_pkts,
 					      bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index c78bebe9b4..e98551e1fb 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -8,7 +8,6 @@
 #include <ethdev_driver.h>
 #include <rte_malloc.h>
 
-#include "../common/rx.h"
 #include "iavf.h"
 #include "iavf_rxtx.h"
 
@@ -21,7 +20,7 @@ iavf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
+_iavf_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -32,15 +31,15 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 	/* free all mbufs that are valid in the ring */
 	if (rxq->rxrearm_nb == 0) {
 		for (i = 0; i < rxq->nb_rx_desc; i++) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	} else {
 		for (i = rxq->rx_tail;
 		     i != rxq->rxrearm_start;
 		     i = (i + 1) & mask) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	}
 
@@ -51,7 +50,7 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 }
 
 static inline int
-iavf_rx_vec_queue_default(struct iavf_rx_queue *rxq)
+iavf_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -117,7 +116,7 @@ static inline int
 iavf_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret;
 	int result = 0;
 
@@ -240,12 +239,12 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 
 #ifdef RTE_ARCH_X86
 static __rte_always_inline void
-iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
+iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
@@ -259,7 +258,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -278,8 +277,8 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 	for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index 86f3a7839d..562e574aab 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -15,12 +15,12 @@
 #include "iavf_rxtx_vec_common.h"
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
@@ -35,7 +35,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		if (rxq->rxrearm_nb + IAVF_VPMD_RXQ_REARM_THRESH >=
 		    rxq->nb_rx_desc) {
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i] = &rxq->fake_mbuf;
+				rxep[i].mbuf = &rxq->fake_mbuf;
 				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
 			}
 		}
@@ -46,8 +46,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
 	for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0];
-		mb1 = rxep[1];
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
 
 		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
 		dma_addr0 = vdupq_n_u64(paddr);
@@ -75,7 +75,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union ci_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	RTE_SET_USED(rxdp);
@@ -193,17 +193,17 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	RTE_SET_USED(split_packet);
 
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -283,8 +283,8 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
 		descs[0] = vld1q_lane_u64(RTE_CAST_PTR(uint64_t *, rxdp), descs[0], 0);
 
 		/* B.1 load 4 mbuf point */
-		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
-		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
+		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos].mbuf);
+		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2].mbuf);
 
 		/* B.2 copy 4 mbuf point into rx_pkts  */
 		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
@@ -394,13 +394,13 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_NEON_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 190c1dd869..8bbcf836b7 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -13,13 +13,13 @@
 #include <rte_vect.h>
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
@@ -33,7 +33,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -47,8 +47,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
@@ -88,7 +88,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		  struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -206,11 +206,11 @@ flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
 		       struct rte_mbuf **rx_pkts)
 #else
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
 #endif
 {
@@ -466,16 +466,16 @@ flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	volatile union ci_rx_desc *rxdp;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -571,7 +571,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -714,16 +714,16 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+_recv_raw_pkts_vec_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet)
 {
-	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	volatile union ci_rx_flex_desc *rxdp;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -779,7 +779,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = (volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->rx_flex_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -857,7 +857,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -1207,7 +1207,7 @@ static uint16_t
 iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1276,7 +1276,7 @@ iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
 				       struct rte_mbuf **rx_pkts,
 				       uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1449,7 +1449,7 @@ iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
@@ -1462,7 +1462,7 @@ iavf_txq_vec_setup(struct ci_tx_queue *txq)
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_SSE_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_vchnl.c b/drivers/net/intel/iavf/iavf_vchnl.c
index da1ef5900f..6d3f1b8ec0 100644
--- a/drivers/net/intel/iavf/iavf_vchnl.c
+++ b/drivers/net/intel/iavf/iavf_vchnl.c
@@ -1218,7 +1218,7 @@ int
 iavf_configure_queues(struct iavf_adapter *adapter,
 		uint16_t num_queue_pairs, uint16_t index)
 {
-	struct iavf_rx_queue **rxq = (struct iavf_rx_queue **)adapter->dev_data->rx_queues;
+	struct ci_rx_queue **rxq = (struct ci_rx_queue **)adapter->dev_data->rx_queues;
 	struct ci_tx_queue **txq = (struct ci_tx_queue **)adapter->dev_data->tx_queues;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -2258,9 +2258,9 @@ iavf_get_ptp_cap(struct iavf_adapter *adapter)
 }
 
 int
-iavf_get_phc_time(struct iavf_rx_queue *rxq)
+iavf_get_phc_time(struct ci_rx_queue *rxq)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_phc_time phc_time;
 	struct iavf_cmd_info args;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 19/25] net/intel: generalize vectorized Rx rearm
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (17 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 18/25] net/iavf: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 20/25] net/i40e: use common Rx rearm code Anatoly Burakov
                     ` (5 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson

There is certain amount of duplication between various drivers when it
comes to Rx ring rearm. This patch takes implementation from ice driver
as a base because it has support for no IOVA in mbuf as well as all
vector implementations, and moves them to a common file.

While we're at it, also make sure to use common definitions for things like
burst size, rearm threshold, and descriptors per loop, which is currently
defined separately in each driver.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Rename rx_vec_sse.h to rx_vec_x86.h
    - Use the common descriptor format instead of constant propagation
    - Use the new unified definitions for burst size, rearm threshold, and descriptors per loop
    - Whitespace and variable name cleanups for vector code

 drivers/net/intel/common/rx.h               |   4 +
 drivers/net/intel/common/rx_vec_x86.h       | 303 ++++++++++++++++++++
 drivers/net/intel/ice/ice_rxtx.h            |  12 +-
 drivers/net/intel/ice/ice_rxtx_common_avx.h | 233 ---------------
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  77 +----
 7 files changed, 322 insertions(+), 317 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_x86.h
 delete mode 100644 drivers/net/intel/ice/ice_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 8d5466eb44..cf83994c47 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -15,6 +15,10 @@
 
 #define CI_RX_MAX_BURST 32
 #define CI_RX_MAX_NSEG 2
+#define CI_VPMD_RX_BURST            32
+#define CI_VPMD_DESCS_PER_LOOP      4
+#define CI_VPMD_DESCS_PER_LOOP_WIDE 8
+#define CI_VPMD_RX_REARM_THRESH     CI_VPMD_RX_BURST
 
 struct ci_rx_queue;
 
diff --git a/drivers/net/intel/common/rx_vec_x86.h b/drivers/net/intel/common/rx_vec_x86.h
new file mode 100644
index 0000000000..7c57016df7
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_x86.h
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_X86_H_
+#define _COMMON_INTEL_RX_VEC_X86_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+
+#include "rx.h"
+
+enum ci_rx_vec_level {
+	CI_RX_VEC_LEVEL_SSE = 0,
+	CI_RX_VEC_LEVEL_AVX2,
+	CI_RX_VEC_LEVEL_AVX512,
+};
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile union ci_rx_desc *rxdp;
+	int i;
+
+	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
+
+	if (rte_mempool_get_bulk(rxq->mp, (void **)rxp, rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			const __m128i zero = _mm_setzero_si128();
+
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i]), zero);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq)
+{
+	const __m128i hdroom = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m128i zero = _mm_setzero_si128();
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_desc *rxdp;
+	int i;
+
+	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp += 2) {
+		struct rte_mbuf *mb0 = rxp[0].mbuf;
+		struct rte_mbuf *mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		__m128i addr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		__m128i addr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* add headroom to address values */
+		addr0 = _mm_add_epi64(addr0, hdroom);
+		addr1 = _mm_add_epi64(addr1, hdroom);
+
+#if RTE_IOVA_IN_MBUF
+		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+		addr0 = _mm_unpackhi_epi64(addr0, zero);
+		addr0 = _mm_unpackhi_epi64(addr1, zero);
+#else
+		/* erase Header Buffer Address */
+		addr0 = _mm_unpacklo_epi64(addr0, zero);
+		addr1 = _mm_unpacklo_epi64(addr1, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[0]), addr0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[1]), addr1);
+	}
+}
+
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
+#ifdef __AVX2__
+/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const __m256i hdroom = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m256i zero = _mm256_setzero_si256();
+	volatile union ci_rx_desc *rxdp;
+	int i;
+
+	RTE_BUILD_BUG_ON(sizeof(union ci_rx_desc) != 16);
+
+	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
+
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp += 4) {
+		struct rte_mbuf *mb0 = rxp[0].mbuf;
+		struct rte_mbuf *mb1 = rxp[1].mbuf;
+		struct rte_mbuf *mb2 = rxp[2].mbuf;
+		struct rte_mbuf *mb3 = rxp[3].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		const __m128i vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		const __m128i vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		const __m128i vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		const __m128i vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3
+		 */
+		const __m256i vaddr0_256 = _mm256_castsi128_si256(vaddr0);
+		const __m256i vaddr2_256 = _mm256_castsi128_si256(vaddr2);
+
+		__m256i addr0_1 = _mm256_inserti128_si256(vaddr0_256, vaddr1, 1);
+		__m256i addr2_3 = _mm256_inserti128_si256(vaddr2_256, vaddr3, 1);
+
+		/* add headroom to address values */
+		addr0_1 = _mm256_add_epi64(addr0_1, hdroom);
+		addr0_1 = _mm256_add_epi64(addr0_1, hdroom);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		addr0_1 = _mm256_unpackhi_epi64(addr0_1, zero);
+		addr2_3 = _mm256_unpackhi_epi64(addr2_3, zero);
+#else
+		/* erase Header Buffer Address */
+		addr0_1 = _mm256_unpacklo_epi64(addr0_1, zero);
+		addr2_3 = _mm256_unpacklo_epi64(addr2_3, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[0]), addr0_1);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[2]), addr2_3);
+	}
+}
+#endif /* __AVX2__ */
+
+#ifdef __AVX512VL__
+/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const __m512i hdroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	const __m512i zero = _mm512_setzero_si512();
+	volatile union ci_rx_desc *rxdp;
+	int i;
+
+	RTE_BUILD_BUG_ON(sizeof(union ci_rx_desc) != 16);
+
+	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
+
+	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp += 8) {
+		struct rte_mbuf *mb0 = rxp[0].mbuf;
+		struct rte_mbuf *mb1 = rxp[1].mbuf;
+		struct rte_mbuf *mb2 = rxp[2].mbuf;
+		struct rte_mbuf *mb3 = rxp[3].mbuf;
+		struct rte_mbuf *mb4 = rxp[4].mbuf;
+		struct rte_mbuf *mb5 = rxp[5].mbuf;
+		struct rte_mbuf *mb6 = rxp[6].mbuf;
+		struct rte_mbuf *mb7 = rxp[7].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		const __m128i vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		const __m128i vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		const __m128i vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		const __m128i vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+		const __m128i vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+		const __m128i vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+		const __m128i vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+		const __m128i vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3, and so on.
+		 */
+		const __m256i addr0_256 = _mm256_castsi128_si256(vaddr0);
+		const __m256i addr2_256 = _mm256_castsi128_si256(vaddr2);
+		const __m256i addr4_256 = _mm256_castsi128_si256(vaddr4);
+		const __m256i addr6_256 = _mm256_castsi128_si256(vaddr6);
+
+		const __m256i addr0_1 = _mm256_inserti128_si256(addr0_256, vaddr1, 1);
+		const __m256i addr2_3 = _mm256_inserti128_si256(addr2_256, vaddr3, 1);
+		const __m256i addr4_5 = _mm256_inserti128_si256(addr4_256, vaddr5, 1);
+		const __m256i addr6_7 = _mm256_inserti128_si256(addr6_256, vaddr7, 1);
+
+		/**
+		 * merge 0_1 & 2_3, by casting 0_1 to 512-bit and inserting 2_3
+		 * into the high lanes. Similarly for 4_5 & 6_7, and so on.
+		 */
+		const __m512i addr0_1_512 = _mm512_castsi256_si512(addr0_1);
+		const __m512i addr4_5_512 = _mm512_castsi256_si512(addr4_5);
+
+		__m512i addr0_3 = _mm512_inserti64x4(addr0_1_512, addr2_3, 1);
+		__m512i addr4_7 = _mm512_inserti64x4(addr4_5_512, addr6_7, 1);
+
+		/* add headroom to address values */
+		addr0_3 = _mm512_add_epi64(addr0_3, hdroom);
+		addr4_7 = _mm512_add_epi64(addr4_7, hdroom);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		addr0_3 = _mm512_unpackhi_epi64(addr0_3, zero);
+		addr4_7 = _mm512_unpackhi_epi64(addr4_7, zero);
+#else
+		/* erase Header Buffer Address */
+		addr0_3 = _mm512_unpacklo_epi64(addr0_3, zero);
+		addr4_7 = _mm512_unpacklo_epi64(addr4_7, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[0]), addr0_3);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[4]), addr4_7);
+	}
+}
+#endif /* __AVX512VL__ */
+#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const enum ci_rx_vec_level vec_level)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq) < 0)
+		return;
+
+#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
+	switch (vec_level) {
+	case CI_RX_VEC_LEVEL_AVX512:
+#ifdef __AVX512VL__
+		_ci_rxq_rearm_avx512(rxq);
+		break;
+#else
+		/* fall back to AVX2 */
+		/* fall through */
+#endif
+	case CI_RX_VEC_LEVEL_AVX2:
+#ifdef __AVX2__
+		_ci_rxq_rearm_avx2(rxq);
+		break;
+#else
+		/* fall back to SSE */
+		/* fall through */
+#endif
+	case CI_RX_VEC_LEVEL_SSE:
+		_ci_rxq_rearm_sse(rxq, desc_len);
+		break;
+	}
+#else
+	/* for 32-byte descriptors only support SSE */
+	switch (vec_level) {
+	case CI_RX_VEC_LEVEL_AVX512:
+	case CI_RX_VEC_LEVEL_AVX2:
+	case CI_RX_VEC_LEVEL_SSE:
+		_ci_rxq_rearm_sse(rxq);
+		break;
+	}
+#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_X86_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 62f98579f5..aa81859ec0 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -28,12 +28,12 @@
 
 #define ICE_TD_CMD                      ICE_TX_DESC_CMD_EOP
 
-#define ICE_VPMD_RX_BURST           32
-#define ICE_VPMD_TX_BURST           32
-#define ICE_VPMD_RXQ_REARM_THRESH   64
-#define ICE_TX_MAX_FREE_BUF_SZ      64
-#define ICE_VPMD_DESCS_PER_LOOP      4
-#define ICE_VPMD_DESCS_PER_LOOP_WIDE 8
+#define ICE_VPMD_RX_BURST            CI_VPMD_RX_BURST
+#define ICE_VPMD_TX_BURST            32
+#define ICE_VPMD_RXQ_REARM_THRESH    CI_VPMD_RX_REARM_THRESH
+#define ICE_TX_MAX_FREE_BUF_SZ       64
+#define ICE_VPMD_DESCS_PER_LOOP      CI_VPMD_DESCS_PER_LOOP
+#define ICE_VPMD_DESCS_PER_LOOP_WIDE CI_VPMD_DESCS_PER_LOOP_WIDE
 
 #define ICE_FDIR_PKT_LEN	512
 
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
deleted file mode 100644
index 7c65e7ed4d..0000000000
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2019 Intel Corporation
- */
-
-#ifndef _ICE_RXTX_COMMON_AVX_H_
-#define _ICE_RXTX_COMMON_AVX_H_
-
-#include "ice_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union ci_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_flex_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_VPMD_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < ICE_VPMD_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, vaddr4_7);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__ */
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < ICE_VPMD_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, vaddr2_3);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_VPMD_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_VPMD_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__ */
-
-#endif /* _ICE_RXTX_COMMON_AVX_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 5b1a13dd22..b952b8dddc 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -3,14 +3,15 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_x86.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_AVX2);
 }
 
 static __rte_always_inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index b943caf0f0..7c6fe82072 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -3,14 +3,15 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_x86.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_AVX512);
 }
 
 static inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index cae2188279..d818b3b728 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -4,6 +4,8 @@
 
 #include "ice_rxtx_vec_common.h"
 
+#include "../common/rx_vec_x86.h"
+
 #include <rte_vect.h>
 
 static inline __m128i
@@ -28,80 +30,7 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 static inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ci_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-					  RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = rxq->rx_flex_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_VPMD_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				 offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += ICE_VPMD_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_VPMD_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_SSE);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 20/25] net/i40e: use common Rx rearm code
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (18 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 19/25] net/intel: generalize vectorized Rx rearm Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 21/25] net/iavf: " Anatoly Burakov
                     ` (4 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

The i40e driver has an implementation of vectorized mbuf rearm code that
is identical to the one in the common code, so just use that.

In addition, the i40e has implementations of Rx queue rearm for Neon and
AltiVec instruction sets, so create a common headers for each of the
instruction sets, and use that in respective i40e code.

While we're at it, also make sure to use common definitions for things like
burst size, rearm threshold, and descriptors per loop, which is currently
defined separately in each driver.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Rename rx_vec_neon.h to rx_vec_arm.h
    - Use the common descriptor format instead of constant propagation
    - Use the new unified definitions for burst size, rearm threshold, and descriptors per loop
    - Whitespace and variable name cleanups for vector code
    - Added missing implementation for PPC and put it in rx_vec_ppc.h

 drivers/net/intel/common/rx_vec_arm.h         | 105 +++++++++
 drivers/net/intel/common/rx_vec_ppc.h         | 121 ++++++++++
 drivers/net/intel/i40e/i40e_rxtx.h            |   8 +-
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 215 ------------------
 .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  83 +------
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  59 +----
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  70 +-----
 9 files changed, 245 insertions(+), 426 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_arm.h
 create mode 100644 drivers/net/intel/common/rx_vec_ppc.h
 delete mode 100644 drivers/net/intel/i40e/i40e_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx_vec_arm.h b/drivers/net/intel/common/rx_vec_arm.h
new file mode 100644
index 0000000000..2e48d4b6c0
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_arm.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_ARM_H_
+#define _COMMON_INTEL_RX_VEC_ARM_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+#include <rte_vect.h>
+
+#include "rx.h"
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile union ci_rx_desc *rxdp;
+	int i;
+
+	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
+
+	if (rte_mempool_get_bulk(rxq->mp, (void **)rxp, rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			uint64x2_t zero = vdupq_n_u64(0);
+
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i]), zero);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_neon(struct ci_rx_queue *rxq)
+{
+	const uint64x2_t zero = vdupq_n_u64(0);
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile union ci_rx_desc *rxdp;
+	int i;
+
+	const uint8x8_t mbuf_init = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
+
+	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp += 2) {
+		struct rte_mbuf *mb0 = rxp[0].mbuf;
+		struct rte_mbuf *mb1 = rxp[1].mbuf;
+
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+#if RTE_IOVA_IN_MBUF
+		const uint64_t addr0 = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+		const uint64_t addr1 = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+#else
+		const uint64_t addr0 = (uintptr_t)RTE_PTR_ADD(mb0->buf_addr, RTE_PKTMBUF_HEADROOM);
+		const uint64_t addr1 = (uintptr_t)RTE_PTR_ADD(mb1->buf_addr, RTE_PKTMBUF_HEADROOM);
+#endif
+		uint64x2_t dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		uint64x2_t dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[0]), dma_addr0);
+		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[1]), dma_addr1);
+	}
+}
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq) < 0)
+		return;
+
+	_ci_rxq_rearm_neon(rxq);
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_ARM_H_ */
diff --git a/drivers/net/intel/common/rx_vec_ppc.h b/drivers/net/intel/common/rx_vec_ppc.h
new file mode 100644
index 0000000000..e41266d028
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_ppc.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_PPC_H_
+#define _COMMON_INTEL_RX_VEC_PPC_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+#include <rte_altivec.h>
+
+#include "rx.h"
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile union ci_rx_desc *rxdp;
+	int i;
+
+	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
+
+	if (rte_mempool_get_bulk(rxq->mp, (void *)rxep, rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			__vector unsigned long dma_addr0 = (__vector unsigned long){};
+
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				vec_st(dma_addr0, 0,
+					RTE_CAST_PTR(__vector unsigned long *, &rxdp[i]));
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_altivec(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	__vector unsigned long hdroom =
+			(__vector unsigned long){RTE_PKTMBUF_HEADROOM, RTE_PKTMBUF_HEADROOM};
+	int i;
+
+	volatile union ci_rx_desc *rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxep += 2) {
+		__vector unsigned long vaddr0, vaddr1;
+		struct rte_mbuf *mb0 = rxep[0].mbuf;
+		struct rte_mbuf *mb1 = rxep[1].mbuf;
+
+		/* Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 * Though, RX will overwrite ol_flags that are coming next
+		 * anyway. So overwrite whole 8 bytes with one load:
+		 * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
+		 */
+		*(uint64_t *)&mb0->rearm_data = rxq->mbuf_initializer;
+		*(uint64_t *)&mb1->rearm_data = rxq->mbuf_initializer;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		vaddr0 = vec_ld(0, (__vector unsigned long *)&mb0->buf_addr);
+		vaddr1 = vec_ld(0, (__vector unsigned long *)&mb1->buf_addr);
+
+#if RTE_IOVA_IN_MBUF
+		/* convert pa to dma_addr hdr/data */
+		vaddr0 = vec_mergel(vaddr0, vaddr0);
+		vaddr1 = vec_mergel(vaddr1, vaddr1);
+#else
+		/* convert va to dma_addr hdr/data */
+		vaddr0 = vec_mergeh(vaddr0, vaddr0);
+		vaddr1 = vec_mergeh(vaddr1, vaddr1);
+#endif
+
+		/* add headroom to pa values */
+		vaddr0 = vec_add(vaddr0, hdroom);
+		vaddr1 = vec_add(vaddr1, hdroom);
+
+		/* flush desc with pa dma_addr */
+		vec_st(vaddr0, 0, RTE_CAST_PTR(__vector unsigned long *, rxdp++));
+		vec_st(vaddr1, 0, RTE_CAST_PTR(__vector unsigned long *, rxdp++));
+	}
+}
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq) < 0)
+		return;
+
+	_ci_rxq_rearm_neon(rxq);
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_ARM_H_ */
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 05c41d473e..984532c507 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -11,11 +11,11 @@
 #define I40E_RX_MAX_BURST CI_RX_MAX_BURST
 #define I40E_TX_MAX_BURST 32
 
-#define I40E_VPMD_RX_BURST            32
-#define I40E_VPMD_RXQ_REARM_THRESH    32
+#define I40E_VPMD_RX_BURST            CI_VPMD_RX_BURST
+#define I40E_VPMD_RXQ_REARM_THRESH    CI_VPMD_RX_REARM_THRESH
 #define I40E_TX_MAX_FREE_BUF_SZ       64
-#define I40E_VPMD_DESCS_PER_LOOP      4
-#define I40E_VPMD_DESCS_PER_LOOP_WIDE 8
+#define I40E_VPMD_DESCS_PER_LOOP      CI_VPMD_DESCS_PER_LOOP
+#define I40E_VPMD_DESCS_PER_LOOP_WIDE CI_VPMD_DESCS_PER_LOOP_WIDE
 
 #define I40E_RXBUF_SZ_1024 1024
 #define I40E_RXBUF_SZ_2048 2048
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
deleted file mode 100644
index 97cf5226f6..0000000000
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2015 Intel Corporation
- */
-
-#ifndef _I40E_RXTX_COMMON_AVX_H_
-#define _I40E_RXTX_COMMON_AVX_H_
-#include <stdint.h>
-#include <ethdev_driver.h>
-#include <rte_malloc.h>
-
-#include "i40e_ethdev.h"
-#include "i40e_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union ci_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 I40E_VPMD_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			I40E_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__*/
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
-			vaddr2_3 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__*/
-
-#endif /*_I40E_RXTX_COMMON_AVX_H_*/
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index a914ef20f4..8a4a1a77bf 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -13,91 +13,14 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_ppc.h"
+
 #include <rte_altivec.h>
 
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ci_rx_desc *rxdp;
-
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-
-	__vector unsigned long hdr_room = (__vector unsigned long){
-						RTE_PKTMBUF_HEADROOM,
-						RTE_PKTMBUF_HEADROOM};
-	__vector unsigned long dma_addr0, dma_addr1;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 I40E_VPMD_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = (__vector unsigned long){};
-			for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vec_st(dma_addr0, 0,
-					RTE_CAST_PTR(__vector unsigned long *, &rxdp[i].read));
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			I40E_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__vector unsigned long vaddr0, vaddr1;
-		uintptr_t p0, p1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		 /* Flush mbuf with pkt template.
-		  * Data to be rearmed is 6 bytes long.
-		  * Though, RX will overwrite ol_flags that are coming next
-		  * anyway. So overwrite whole 8 bytes with one load:
-		  * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
-		  */
-		p0 = (uintptr_t)&mb0->rearm_data;
-		*(uint64_t *)p0 = rxq->mbuf_initializer;
-		p1 = (uintptr_t)&mb1->rearm_data;
-		*(uint64_t *)p1 = rxq->mbuf_initializer;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		vaddr0 = vec_ld(0, (__vector unsigned long *)&mb0->buf_addr);
-		vaddr1 = vec_ld(0, (__vector unsigned long *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = vec_mergel(vaddr0, vaddr0);
-		dma_addr1 = vec_mergel(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = vec_add(dma_addr0, hdr_room);
-		dma_addr1 = vec_add(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		vec_st(dma_addr0, 0, RTE_CAST_PTR(__vector unsigned long *, &rxdp++->read));
-		vec_st(dma_addr1, 0, RTE_CAST_PTR(__vector unsigned long *, &rxdp++->read));
-	}
-
-	rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq);
 }
 
 static inline void
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index fee2a6e670..aeb2756e7a 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -11,14 +11,15 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_x86.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_AVX2);
 }
 
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index e609b7c411..571987d27a 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -11,14 +11,15 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_x86.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_AVX512);
 }
 
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 02ba03c290..64ffb2f6df 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -16,65 +16,12 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_arm.h"
+
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ci_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  I40E_VPMD_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			I40E_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq);
 }
 
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 6bafd96797..15cf07e548 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -12,78 +12,14 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_x86.h"
+
 #include <rte_vect.h>
 
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ci_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 I40E_VPMD_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + I40E_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < I40E_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			I40E_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < I40E_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += I40E_VPMD_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= I40E_VPMD_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifndef RTE_NET_INTEL_USE_16BYTE_DESC
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 21/25] net/iavf: use common Rx rearm code
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (19 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 20/25] net/i40e: use common Rx rearm code Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 22/25] net/ixgbe: " Anatoly Burakov
                     ` (3 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

The iavf driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

While we're at it, also make sure to use common definitions for things like
burst size, rearm threshold, and descriptors per loop, which is currently
defined separately in each driver.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Use the common descriptor format instead of constant propagation
    - Use the new unified definitions for burst size, rearm threshold, and descriptors per loop

 drivers/net/intel/iavf/iavf_rxtx.h            |   8 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 199 ------------------
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  58 +----
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  72 +------
 6 files changed, 13 insertions(+), 330 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 98abebae90..df5164f534 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -23,11 +23,11 @@
 #define IAVF_RX_MAX_DATA_BUF_SIZE (16 * 1024 - 128)
 
 /* used for Vector PMD */
-#define IAVF_VPMD_RX_BURST             32
+#define IAVF_VPMD_RX_BURST             CI_VPMD_RX_BURST
 #define IAVF_VPMD_TX_BURST             32
-#define IAVF_VPMD_RXQ_REARM_THRESH     32
-#define IAVF_VPMD_DESCS_PER_LOOP       4
-#define IAVF_VPMD_DESCS_PER_LOOP_WIDE  8
+#define IAVF_VPMD_RXQ_REARM_THRESH     CI_VPMD_RX_REARM_THRESH
+#define IAVF_VPMD_DESCS_PER_LOOP       CI_VPMD_DESCS_PER_LOOP
+#define IAVF_VPMD_DESCS_PER_LOOP_WIDE  CI_VPMD_DESCS_PER_LOOP_WIDE
 #define IAVF_VPMD_TX_MAX_FREE_BUF      64
 
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index 319f0166ce..ca118ea02e 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2019 Intel Corporation
  */
 
+#include "../common/rx_vec_x86.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -9,7 +10,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_AVX2);
 }
 
 #define PKTLEN_SHIFT     10
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index d2aeccf5e6..01ec8ddd10 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include "../common/rx_vec_x86.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -29,7 +30,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_AVX512);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index e98551e1fb..335e7e9af5 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -237,203 +237,4 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 	*txd_hi |= ((uint64_t)td_cmd) << IAVF_TXD_QW1_CMD_SHIFT;
 }
 
-#ifdef RTE_ARCH_X86
-static __rte_always_inline void
-iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union ci_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxp,
-				 IAVF_VPMD_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef CC_AVX512_SUPPORT
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH;
-				i += 8, rxp += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-			mb4 = rxp[4];
-			mb5 = rxp[5];
-			mb6 = rxp[6];
-			mb7 = rxp[7];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-							vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-							vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
-			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
-		}
-	} else
-#endif
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH;
-				i += 4, rxp += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += IAVF_VPMD_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_VPMD_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
 #endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index 562e574aab..4ed4e9b336 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -14,64 +14,12 @@
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
 
+#include "../common/rx_vec_arm.h"
+
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ci_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  IAVF_VPMD_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + IAVF_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += IAVF_VPMD_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_VPMD_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq);
 }
 
 static inline void
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 8bbcf836b7..c6589ecd81 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -9,82 +9,14 @@
 #include "iavf.h"
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
+#include "../common/rx_vec_x86.h"
 
 #include <rte_vect.h>
 
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-
-	volatile union ci_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
-				 rxq->rx_free_thresh) < 0) {
-		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			rxq->rx_free_thresh;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += rxq->rx_free_thresh;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= rxq->rx_free_thresh;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-		   "rearm_start=%u rearm_nb=%u",
-		   rxq->port_id, rxq->queue_id,
-		   rx_id, rxq->rxrearm_start, rxq->rxrearm_nb);
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_SSE);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 22/25] net/ixgbe: use common Rx rearm code
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (20 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 21/25] net/iavf: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 23/25] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
                     ` (2 subsequent siblings)
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The ixgbe driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Since ixgbe Rx descriptors are always 16-byte wide, force using 16-byte
definitions in the common headers with a define flag.

While we're at it, also make sure to use common definitions for things like
burst size, rearm threshold, and descriptors per loop, which is currently
defined separately in each driver.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Use the common descriptor format instead of constant propagation
    - Use some definitional hackery to work around IXGBE descriptors always being 16-byte
    - Use the new unified definitions for burst size, rearm threshold, and descriptors per loop

 drivers/net/intel/ixgbe/ixgbe_rxtx.h          | 21 ++++-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c | 67 +---------------
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  | 76 +------------------
 3 files changed, 26 insertions(+), 138 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index d1847a33dd..0640336156 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -5,9 +5,24 @@
 #ifndef _IXGBE_RXTX_H_
 #define _IXGBE_RXTX_H_
 
+/*
+ * For IXGBE, descriptor size is always 16 bytes, so in order to have all
+ * vectorized and common code building correctly and with proper offsets, force
+ * the common parts to consider IXGBE descriptors to be 16-bytes in size.
+ */
+#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
+#define IXGBE_FORCE_16BYTE_DESC
+#define RTE_NET_INTEL_USE_16BYTE_DESC
+#endif
+
 #include "../common/rx.h"
 #include "../common/tx.h"
 
+#ifdef IXGBE_FORCE_16BYTE_DESC
+#undef RTE_NET_INTEL_USE_16BYTE_DESC
+#undef IXGBE_FORCE_16BYTE_DESC
+#endif
+
 /*
  * Rings setup and release.
  *
@@ -34,10 +49,10 @@
 #define IXGBE_RX_MAX_BURST            CI_RX_MAX_BURST
 #define IXGBE_TX_MAX_FREE_BUF_SZ      64
 
-#define IXGBE_VPMD_DESCS_PER_LOOP     4
+#define IXGBE_VPMD_DESCS_PER_LOOP     CI_VPMD_DESCS_PER_LOOP
 
-#define IXGBE_VPMD_RXQ_REARM_THRESH   32
-#define IXGBE_VPMD_RX_BURST           IXGBE_VPMD_RXQ_REARM_THRESH
+#define IXGBE_VPMD_RXQ_REARM_THRESH   CI_VPMD_RX_REARM_THRESH
+#define IXGBE_VPMD_RX_BURST           CI_VPMD_RX_BURST
 
 #define RX_RING_SZ ((IXGBE_MAX_RING_DESC + IXGBE_RX_MAX_BURST) * \
 		    sizeof(union ixgbe_adv_rx_desc))
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 82c655e769..481ff61c60 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -11,72 +11,13 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_arm.h"
+
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-	uint8x8_t p;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  IXGBE_VPMD_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + IXGBE_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < IXGBE_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read),
-					  zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IXGBE_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-	p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IXGBE_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/*
-		 * Flush mbuf with pkt template.
-		 * Data to be rearmed is 6 bytes long.
-		 */
-		vst1_u8((uint8_t *)&mb0->rearm_data, p);
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		vst1_u8((uint8_t *)&mb1->rearm_data, p);
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += IXGBE_VPMD_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IXGBE_VPMD_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	RTE_BUILD_BUG_ON(sizeof(union ci_rx_desc) != sizeof(union ixgbe_adv_rx_desc));
+	ci_rxq_rearm(rxq);
 }
 
 static inline void
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index f6aa3f9f9a..03d16e7b36 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -10,83 +10,15 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_x86.h"
+
 #include <rte_vect.h>
 
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 IXGBE_VPMD_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IXGBE_VPMD_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IXGBE_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IXGBE_VPMD_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IXGBE_VPMD_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
-		vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* set Header Buffer Address to zero */
-		dma_addr0 =  _mm_and_si128(dma_addr0, hba_msk);
-		dma_addr1 =  _mm_and_si128(dma_addr1, hba_msk);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += IXGBE_VPMD_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IXGBE_VPMD_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	RTE_BUILD_BUG_ON(sizeof(union ci_rx_desc) != sizeof(union ixgbe_adv_rx_desc));
+	ci_rxq_rearm(rxq, CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifdef RTE_LIB_SECURITY
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 23/25] net/intel: support wider x86 vectors for Rx rearm
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (21 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 22/25] net/ixgbe: " Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 24/25] net/intel: add common Rx mbuf recycle Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 25/25] net/intel: add common Tx " Anatoly Burakov
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson

Currently, for 32-byte descriptor format, only SSE instruction set is
supported. Add implementation for AVX2 and AVX512 instruction sets. Since
we are using Rx descriptor definitions from common code, we can just use
the generic descriptor definition, as we only ever write the first 16 bytes
of it, and the layout is always the same for that part.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Use the common descriptor format instead of constant propagation
    - Syntax and whitespace cleanups

 drivers/net/intel/common/rx_vec_x86.h | 339 ++++++++++++++------------
 1 file changed, 183 insertions(+), 156 deletions(-)

diff --git a/drivers/net/intel/common/rx_vec_x86.h b/drivers/net/intel/common/rx_vec_x86.h
index 7c57016df7..43f7c59449 100644
--- a/drivers/net/intel/common/rx_vec_x86.h
+++ b/drivers/net/intel/common/rx_vec_x86.h
@@ -43,206 +43,244 @@ _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq)
 	return 0;
 }
 
-/*
- * SSE code path can handle both 16-byte and 32-byte descriptors with one code
- * path, as we only ever write 16 bytes at a time.
- */
-static __rte_always_inline void
-_ci_rxq_rearm_sse(struct ci_rx_queue *rxq)
+static __rte_always_inline __m128i
+_ci_rxq_rearm_desc_sse(const __m128i vaddr)
 {
 	const __m128i hdroom = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m128i zero = _mm_setzero_si128();
+
+	/* add headroom to address values */
+	__m128i reg = _mm_add_epi64(vaddr, hdroom);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			 offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq)
+{
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	/* SSE writes 16-bytes regardless of descriptor size */
+	const uint8_t desc_per_reg = 1;
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile union ci_rx_desc *rxdp;
 	int i;
 
 	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp += 2) {
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp += desc_per_iter) {
 		struct rte_mbuf *mb0 = rxp[0].mbuf;
 		struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		__m128i addr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		__m128i addr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+		const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-		/* add headroom to address values */
-		addr0 = _mm_add_epi64(addr0, hdroom);
-		addr1 = _mm_add_epi64(addr1, hdroom);
+		const __m128i reg0 = _ci_rxq_rearm_desc_sse(vaddr0);
+		const __m128i reg1 = _ci_rxq_rearm_desc_sse(vaddr1);
 
-#if RTE_IOVA_IN_MBUF
-		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
-		addr0 = _mm_unpackhi_epi64(addr0, zero);
-		addr0 = _mm_unpackhi_epi64(addr1, zero);
-#else
-		/* erase Header Buffer Address */
-		addr0 = _mm_unpacklo_epi64(addr0, zero);
-		addr1 = _mm_unpacklo_epi64(addr1, zero);
-#endif
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[0]), addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[1]), addr1);
+		/* flush descriptors */
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[0]), reg0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[1]), reg1);
 	}
 }
 
-#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
 #ifdef __AVX2__
-/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
-static __rte_always_inline void
-_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+static __rte_always_inline __m256i
+_ci_rxq_rearm_desc_avx2(const __m128i vaddr0, const __m128i vaddr1)
 {
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const __m256i hdroom = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m256i zero = _mm256_setzero_si256();
+
+	/* merge by casting 0 to 256-bit and inserting 1 into the high lanes */
+	__m256i reg = _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+
+	/* add headroom to address values */
+	reg = _mm256_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm256_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm256_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m256i) / sizeof(union ci_rx_desc);
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile union ci_rx_desc *rxdp;
 	int i;
 
-	RTE_BUILD_BUG_ON(sizeof(union ci_rx_desc) != 16);
-
 	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
 
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp += 4) {
-		struct rte_mbuf *mb0 = rxp[0].mbuf;
-		struct rte_mbuf *mb1 = rxp[1].mbuf;
-		struct rte_mbuf *mb2 = rxp[2].mbuf;
-		struct rte_mbuf *mb3 = rxp[3].mbuf;
+	/* Initialize the mbufs in vector, process 2 or 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp += desc_per_iter) {
+		__m256i reg0, reg1;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		const __m128i vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		const __m128i vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		const __m128i vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		const __m128i vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+		if (desc_per_iter == 2) {
+			/* 16 byte descriptor, 16 byte zero, times two */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		const __m256i vaddr0_256 = _mm256_castsi128_si256(vaddr0);
-		const __m256i vaddr2_256 = _mm256_castsi128_si256(vaddr2);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-		__m256i addr0_1 = _mm256_inserti128_si256(vaddr0_256, vaddr1, 1);
-		__m256i addr2_3 = _mm256_inserti128_si256(vaddr2_256, vaddr3, 1);
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, zero);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr1, zero);
+		} else {
+			/* 16 byte descriptor times four */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-		/* add headroom to address values */
-		addr0_1 = _mm256_add_epi64(addr0_1, hdroom);
-		addr0_1 = _mm256_add_epi64(addr0_1, hdroom);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		addr0_1 = _mm256_unpackhi_epi64(addr0_1, zero);
-		addr2_3 = _mm256_unpackhi_epi64(addr2_3, zero);
-#else
-		/* erase Header Buffer Address */
-		addr0_1 = _mm256_unpacklo_epi64(addr0_1, zero);
-		addr2_3 = _mm256_unpacklo_epi64(addr2_3, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, vaddr1);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr2, vaddr3);
+		}
 
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[0]), addr0_1);
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[2]), addr2_3);
+		/* flush descriptors */
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[0]), reg0);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp[2]), reg1);
 	}
 }
 #endif /* __AVX2__ */
 
 #ifdef __AVX512VL__
-/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
-static __rte_always_inline void
-_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+static __rte_always_inline __m512i
+_ci_rxq_rearm_desc_avx512(const __m128i vaddr0, const __m128i vaddr1,
+		const __m128i vaddr2, const __m128i vaddr3)
 {
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const __m512i hdroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
 	const __m512i zero = _mm512_setzero_si512();
+	const __m512i hdroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+
+	/**
+	 * merge 0 & 1, by casting 0 to 256-bit and inserting 1 into the high
+	 * lanes. Similarly for 2 & 3.
+	 */
+	const __m256i vaddr0_1 = _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
+	const __m256i vaddr2_3 = _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), vaddr3, 1);
+	/*
+	 * merge 0+1 & 2+3, by casting 0+1 to 512-bit and inserting 2+3 into the
+	 * high lanes.
+	 */
+	__m512i reg = _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1), vaddr2_3, 1);
+
+	/* add headroom to address values */
+	reg = _mm512_add_epi64(reg, hdroom);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm512_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm512_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m512i) / sizeof(union ci_rx_desc);
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile union ci_rx_desc *rxdp;
 	int i;
 
-	RTE_BUILD_BUG_ON(sizeof(union ci_rx_desc) != 16);
-
 	rxdp = &rxq->rx_ring[rxq->rxrearm_start];
 
-	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp += 8) {
-		struct rte_mbuf *mb0 = rxp[0].mbuf;
-		struct rte_mbuf *mb1 = rxp[1].mbuf;
-		struct rte_mbuf *mb2 = rxp[2].mbuf;
-		struct rte_mbuf *mb3 = rxp[3].mbuf;
-		struct rte_mbuf *mb4 = rxp[4].mbuf;
-		struct rte_mbuf *mb5 = rxp[5].mbuf;
-		struct rte_mbuf *mb6 = rxp[6].mbuf;
-		struct rte_mbuf *mb7 = rxp[7].mbuf;
+	/* Initialize the mbufs in vector, process 4 or 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp += desc_per_iter) {
+		__m512i reg0, reg1;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		const __m128i vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		const __m128i vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		const __m128i vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		const __m128i vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-		const __m128i vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-		const __m128i vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-		const __m128i vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-		const __m128i vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+		if (desc_per_iter == 4) {
+			/* 16-byte descriptor, 16 byte zero, times four */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3, and so on.
-		 */
-		const __m256i addr0_256 = _mm256_castsi128_si256(vaddr0);
-		const __m256i addr2_256 = _mm256_castsi128_si256(vaddr2);
-		const __m256i addr4_256 = _mm256_castsi128_si256(vaddr4);
-		const __m256i addr6_256 = _mm256_castsi128_si256(vaddr6);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-		const __m256i addr0_1 = _mm256_inserti128_si256(addr0_256, vaddr1, 1);
-		const __m256i addr2_3 = _mm256_inserti128_si256(addr2_256, vaddr3, 1);
-		const __m256i addr4_5 = _mm256_inserti128_si256(addr4_256, vaddr5, 1);
-		const __m256i addr6_7 = _mm256_inserti128_si256(addr6_256, vaddr7, 1);
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, zero, vaddr1, zero);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr2, zero, vaddr3, zero);
+		} else {
+			/* 16-byte descriptor times eight */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
+			const struct rte_mbuf *mb4 = rxp[4].mbuf;
+			const struct rte_mbuf *mb5 = rxp[5].mbuf;
+			const struct rte_mbuf *mb6 = rxp[6].mbuf;
+			const struct rte_mbuf *mb7 = rxp[7].mbuf;
 
-		/**
-		 * merge 0_1 & 2_3, by casting 0_1 to 512-bit and inserting 2_3
-		 * into the high lanes. Similarly for 4_5 & 6_7, and so on.
-		 */
-		const __m512i addr0_1_512 = _mm512_castsi256_si512(addr0_1);
-		const __m512i addr4_5_512 = _mm512_castsi256_si512(addr4_5);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
+			const __m128i vaddr4 = _mm_loadu_si128((const __m128i *)&mb4->buf_addr);
+			const __m128i vaddr5 = _mm_loadu_si128((const __m128i *)&mb5->buf_addr);
+			const __m128i vaddr6 = _mm_loadu_si128((const __m128i *)&mb6->buf_addr);
+			const __m128i vaddr7 = _mm_loadu_si128((const __m128i *)&mb7->buf_addr);
 
-		__m512i addr0_3 = _mm512_inserti64x4(addr0_1_512, addr2_3, 1);
-		__m512i addr4_7 = _mm512_inserti64x4(addr4_5_512, addr6_7, 1);
-
-		/* add headroom to address values */
-		addr0_3 = _mm512_add_epi64(addr0_3, hdroom);
-		addr4_7 = _mm512_add_epi64(addr4_7, hdroom);
-
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		addr0_3 = _mm512_unpackhi_epi64(addr0_3, zero);
-		addr4_7 = _mm512_unpackhi_epi64(addr4_7, zero);
-#else
-		/* erase Header Buffer Address */
-		addr0_3 = _mm512_unpacklo_epi64(addr0_3, zero);
-		addr4_7 = _mm512_unpacklo_epi64(addr4_7, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, vaddr1, vaddr2, vaddr3);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr4, vaddr5, vaddr6, vaddr7);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[0]), addr0_3);
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[4]), addr4_7);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[0]), reg0);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp[4]), reg1);
 	}
 }
 #endif /* __AVX512VL__ */
-#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
 
 static __rte_always_inline void
 ci_rxq_rearm(struct ci_rx_queue *rxq, const enum ci_rx_vec_level vec_level)
@@ -254,7 +292,6 @@ ci_rxq_rearm(struct ci_rx_queue *rxq, const enum ci_rx_vec_level vec_level)
 	if (_ci_rxq_rearm_get_bufs(rxq) < 0)
 		return;
 
-#ifdef RTE_NET_INTEL_USE_16BYTE_DESC
 	switch (vec_level) {
 	case CI_RX_VEC_LEVEL_AVX512:
 #ifdef __AVX512VL__
@@ -272,20 +309,10 @@ ci_rxq_rearm(struct ci_rx_queue *rxq, const enum ci_rx_vec_level vec_level)
 		/* fall back to SSE */
 		/* fall through */
 #endif
-	case CI_RX_VEC_LEVEL_SSE:
-		_ci_rxq_rearm_sse(rxq, desc_len);
-		break;
-	}
-#else
-	/* for 32-byte descriptors only support SSE */
-	switch (vec_level) {
-	case CI_RX_VEC_LEVEL_AVX512:
-	case CI_RX_VEC_LEVEL_AVX2:
 	case CI_RX_VEC_LEVEL_SSE:
 		_ci_rxq_rearm_sse(rxq);
 		break;
 	}
-#endif /* RTE_NET_INTEL_USE_16BYTE_DESC */
 
 	rxq->rxrearm_start += rearm_thresh;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 24/25] net/intel: add common Rx mbuf recycle
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (22 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 23/25] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  2025-05-30 13:57   ` [PATCH v4 25/25] net/intel: add common Tx " Anatoly Burakov
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Rx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

While we're at it, also support no-IOVA-in-mbuf case.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 68 +++++++++++++++++++
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 37 +---------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 35 +---------
 3 files changed, 74 insertions(+), 66 deletions(-)
 create mode 100644 drivers/net/intel/common/recycle_mbufs.h

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
new file mode 100644
index 0000000000..c32e2ce9b1
--- /dev/null
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Intel Corporation
+ */
+
+ #ifndef _COMMON_INTEL_RECYCLE_MBUFS_H_
+#define _COMMON_INTEL_RECYCLE_MBUFS_H_
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <ethdev_driver.h>
+
+#include "rx.h"
+#include "tx.h"
+
+/**
+ * Recycle mbufs for Rx queue.
+ *
+ * @param rxq Rx queue pointer
+ * @param nb_mbufs number of mbufs to recycle
+ * @param desc_len length of Rx descriptor
+ */
+static __rte_always_inline void
+ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs)
+{
+	struct ci_rx_entry *rxep;
+	volatile union ci_rx_desc *rxdp;
+	uint16_t rx_id;
+	uint16_t i;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	for (i = 0; i < nb_mbufs; i++) {
+		struct rte_mbuf *mb = rxep[i].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		const uint64_t paddr = mb->buf_iova + RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(paddr);
+#else
+		const uint64_t vaddr = (uintptr_t)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(vaddr);
+#endif
+
+		rxdp[i].read.hdr_addr = 0;
+		rxdp[i].read.pkt_addr = dma_addr;
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += nb_mbufs;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = 0;
+		rx_id = rxq->nb_rx_desc - 1;
+	}
+
+	rxq->rxrearm_nb -= nb_mbufs;
+
+	rte_io_wmb();
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 20d9fd7b22..0b036faea9 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -10,43 +10,12 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union ci_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* flush desc with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs);
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index 1df1787c7f..776bb4303f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -8,41 +8,12 @@
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* Flush descriptors with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs);
 }
 
 uint16_t
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

* [PATCH v4 25/25] net/intel: add common Tx mbuf recycle
  2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
                     ` (23 preceding siblings ...)
  2025-05-30 13:57   ` [PATCH v4 24/25] net/intel: add common Rx mbuf recycle Anatoly Burakov
@ 2025-05-30 13:57   ` Anatoly Burakov
  24 siblings, 0 replies; 82+ messages in thread
From: Anatoly Burakov @ 2025-05-30 13:57 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Tx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3 -> v4:
    - Use the common desc_done function to check for DD bit status
    - Add a desc_done implementation for ixgbe
    
    [implementation note]
    We could've used ixgbe_tx_desc_done in ixgbe_rxtx.c, but it is defined in
    ixgbe_rxtx_vec_common.h, and including that file causes a clash between two
    different implementations of ixgbe_tx_free_bufs(), so I left it alone.

 drivers/net/intel/common/recycle_mbufs.h      | 105 ++++++++++++++++++
 .../i40e/i40e_recycle_mbufs_vec_common.c      |  90 +--------------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    |  92 +--------------
 .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |   9 ++
 4 files changed, 120 insertions(+), 176 deletions(-)

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
index c32e2ce9b1..5b5abba918 100644
--- a/drivers/net/intel/common/recycle_mbufs.h
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -65,4 +65,109 @@ ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs)
 	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
 }
 
+/**
+ * Recycle buffers on Tx.
+ *
+ * @param txq Tx queue pointer
+ * @param desc_done function to check if the Tx descriptor is done
+ * @param recycle_rxq_info recycling mbuf information
+ *
+ * @return how many buffers were recycled
+ */
+static __rte_always_inline uint16_t
+ci_tx_recycle_mbufs(struct ci_tx_queue *txq, ci_desc_done_fn desc_done,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct ci_tx_entry *txep;
+	struct rte_mbuf **rxep;
+	int i, n;
+	uint16_t nb_recycle_mbufs;
+	uint16_t avail = 0;
+	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+	uint16_t refill_head = *recycle_rxq_info->refill_head;
+	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+	/* Get available recycling Rx buffers. */
+	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+	/* Check Tx free thresh and Rx available space. */
+	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+		return 0;
+
+	if (!desc_done(txq, txq->tx_next_dd)) {
+		/* If the Tx descriptor is not done, we can not recycle
+		 * buffers.
+		 */
+		return 0;
+	}
+
+	n = txq->tx_rs_thresh;
+	nb_recycle_mbufs = n;
+
+	/* Mbufs recycle mode can only support no ring buffer wrapping around.
+	 * Two case for this:
+	 *
+	 * case 1: The refill head of Rx buffer ring needs to be aligned with
+	 * mbuf ring size. In this case, the number of Tx freeing buffers
+	 * should be equal to refill_requirement.
+	 *
+	 * case 2: The refill head of Rx ring buffer does not need to be aligned
+	 * with mbuf ring size. In this case, the update of refill head can not
+	 * exceed the Rx mbuf ring size.
+	 */
+	if ((refill_requirement && refill_requirement != n) ||
+		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+		return 0;
+
+	/* First buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1).
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+	rxep = recycle_rxq_info->mbuf_ring;
+	rxep += refill_head;
+
+	/* is fast-free enabled in offloads? */
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* Avoid txq containing buffers from unexpected mempool. */
+		if (unlikely(recycle_rxq_info->mp
+					!= txep[0].mbuf->pool))
+			return 0;
+
+		/* Directly put mbufs from Tx to Rx. */
+		for (i = 0; i < n; i++)
+			rxep[i] = txep[i].mbuf;
+	} else {
+		for (i = 0; i < n; i++) {
+			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+			/* If Tx buffers are not the last reference or from
+			 * unexpected mempool, previous copied buffers are
+			 * considered as invalid.
+			 */
+			if (unlikely(rxep[i] == NULL ||
+				recycle_rxq_info->mp != txep[i].mbuf->pool))
+				nb_recycle_mbufs = 0;
+		}
+		/* If Tx buffers are not the last reference or
+		 * from unexpected mempool, all recycled buffers
+		 * are put into mempool.
+		 */
+		if (nb_recycle_mbufs == 0)
+			for (i = 0; i < n; i++) {
+				if (rxep[i] != NULL)
+					rte_mempool_put(rxep[i]->pool, rxep[i]);
+			}
+	}
+
+	/* Update counters for Tx. */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+	return nb_recycle_mbufs;
+}
+
 #endif
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 0b036faea9..5faaff28c4 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -10,6 +10,8 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#include "i40e_rxtx_vec_common.h"
+
 #include "../common/recycle_mbufs.h"
 
 void
@@ -23,92 +25,6 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
-			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
-		return 0;
-
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle mode can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * mbuf ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with mbuf ring size. In this case, the update of refill head can not
-	 * exceed the Rx mbuf ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
-		return 0;
-
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(txq, i40e_tx_desc_done, recycle_rxq_info);
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index 776bb4303f..f0ffc4360e 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -8,6 +8,8 @@
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 
+#include "ixgbe_rxtx_vec_common.h"
+
 #include "../common/recycle_mbufs.h"
 
 void
@@ -20,93 +22,5 @@ uint16_t
 ixgbe_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 		struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint32_t status;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
-
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
-	if (!(status & IXGBE_ADVTXD_STAT_DD))
-		return 0;
-
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * buffer ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with buffer ring size. In this case, the update of refill head can not
-	 * exceed the Rx buffer ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
-		return 0;
-
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(tx_queue, ixgbe_tx_desc_done, recycle_rxq_info);
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
index 538a2b5164..2ec7774731 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
@@ -6,11 +6,20 @@
 #define _IXGBE_RXTX_VEC_COMMON_H_
 #include <stdint.h>
 #include <ethdev_driver.h>
+#include <rte_malloc.h>
 
 #include "../common/rx.h"
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 
+static inline int
+ixgbe_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
+{
+	const uint32_t status = txq->ixgbe_tx_ring[idx].wb.status;
+
+	return !!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD));
+}
+
 static __rte_always_inline int
 ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 {
-- 
2.47.1


^ permalink raw reply	[flat|nested] 82+ messages in thread

end of thread, other threads:[~2025-05-30 14:01 UTC | newest]

Thread overview: 82+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 04/13] net/i40e: use the " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 05/13] net/ice: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 06/13] net/iavf: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 09/13] net/iavf: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 10/13] net/ixgbe: " Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 13/13] net/intel: add common Tx " Anatoly Burakov
2025-05-12 10:58 ` [PATCH v2 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 04/13] net/i40e: use the " Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 05/13] net/ice: " Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 06/13] net/iavf: " Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 09/13] net/iavf: " Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 10/13] net/ixgbe: " Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-12 10:58   ` [PATCH v2 13/13] net/intel: add common Tx " Anatoly Burakov
2025-05-12 12:54 ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-12 12:54   ` [PATCH v3 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-14 16:39     ` Bruce Richardson
2025-05-12 12:54   ` [PATCH v3 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-14 16:45     ` Bruce Richardson
2025-05-12 12:54   ` [PATCH v3 04/13] net/i40e: use the " Anatoly Burakov
2025-05-14 16:52     ` Bruce Richardson
2025-05-15 11:09       ` Burakov, Anatoly
2025-05-15 12:55         ` Bruce Richardson
2025-05-12 12:54   ` [PATCH v3 05/13] net/ice: " Anatoly Burakov
2025-05-14 16:56     ` Bruce Richardson
2025-05-23 11:16       ` Burakov, Anatoly
2025-05-12 12:54   ` [PATCH v3 06/13] net/iavf: " Anatoly Burakov
2025-05-15 10:59     ` Bruce Richardson
2025-05-15 11:11       ` Burakov, Anatoly
2025-05-15 12:57         ` Bruce Richardson
2025-05-12 12:54   ` [PATCH v3 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-15 10:56     ` Bruce Richardson
2025-05-12 12:54   ` [PATCH v3 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-15 10:58     ` Bruce Richardson
2025-05-12 12:54   ` [PATCH v3 09/13] net/iavf: " Anatoly Burakov
2025-05-12 12:54   ` [PATCH v3 10/13] net/ixgbe: " Anatoly Burakov
2025-05-12 12:54   ` [PATCH v3 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
2025-05-12 12:54   ` [PATCH v3 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-12 12:54   ` [PATCH v3 13/13] net/intel: add common Tx " Anatoly Burakov
2025-05-15 11:07     ` Bruce Richardson
2025-05-12 12:58   ` [PATCH v3 01/13] net/ixgbe: remove unused field in Rx queue struct Bruce Richardson
2025-05-14 16:32   ` Bruce Richardson
2025-05-15 11:15     ` Burakov, Anatoly
2025-05-15 12:58       ` Bruce Richardson
2025-05-30 13:56 ` [PATCH v4 00/25] Intel PMD drivers Rx cleanp Anatoly Burakov
2025-05-30 13:56   ` [PATCH v4 01/25] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-30 13:56   ` [PATCH v4 02/25] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-30 13:56   ` [PATCH v4 03/25] net/ixgbe: match variable names to other drivers Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 04/25] net/i40e: match variable name " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 05/25] net/ice: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 06/25] net/i40e: rename 16-byte descriptor define Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 07/25] net/ice: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 08/25] net/iavf: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 09/25] net/ixgbe: simplify vector PMD compilation Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 10/25] net/ixgbe: replace always-true check Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 11/25] net/ixgbe: clean up definitions Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 12/25] net/i40e: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 13/25] net/ice: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 14/25] net/iavf: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 15/25] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 16/25] net/i40e: use the " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 17/25] net/ice: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 18/25] net/iavf: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 19/25] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 20/25] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 21/25] net/iavf: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 22/25] net/ixgbe: " Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 23/25] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 24/25] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-30 13:57   ` [PATCH v4 25/25] net/intel: add common Tx " Anatoly Burakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).