DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct
@ 2025-05-06 13:27 Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
                   ` (11 more replies)
  0 siblings, 12 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The `rdh` (read head) field in the `ixgbe_rx_queue` struct is not used
anywhere in the codebase, and can be removed.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++-------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 95c80ac1b8..0c07ce3186 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -3296,17 +3296,12 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	/*
 	 * Modified to setup VFRDT for Virtual Function
 	 */
-	if (ixgbe_is_vf(dev)) {
+	if (ixgbe_is_vf(dev))
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
-	} else {
+	else
 		rxq->rdt_reg_addr =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
-		rxq->rdh_reg_addr =
-			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
-	}
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 641f982b01..20a5c5a0af 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -85,7 +85,6 @@ struct ixgbe_rx_queue {
 	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
 	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
 	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
 	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
 	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

Currently, the stats structure is directly embedded in the queue structure.
We're about to move iavf driver to a common Rx queue structure, so we can't
have driver-specific structures that aren't pointers, inside the common
queue structure. To prepare, we replace direct embedding into the queue
structure with a pointer to the stats structure.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/iavf/iavf_ethdev.c |  2 +-
 drivers/net/intel/iavf/iavf_rxtx.c   | 21 ++++++++++++++++++---
 drivers/net/intel/iavf/iavf_rxtx.h   |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index b3dacbef84..5babd587b3 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -1870,7 +1870,7 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 		struct iavf_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
 		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
-		stats = &rxq->stats.ipsec_crypto;
+		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
 		ips->ierrors.count += stats->ierrors.count;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 5411eb6897..d23d2df807 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -619,6 +619,18 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	/* Allocate stats */
+	rxq->stats = rte_zmalloc_socket("iavf rxq stats",
+				 sizeof(struct iavf_rx_queue_stats),
+				 RTE_CACHE_LINE_SIZE,
+				 socket_id);
+	if (!rxq->stats) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for "
+			     "rx queue stats");
+		rte_free(rxq);
+		return -ENOMEM;
+	}
+
 	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
 		proto_xtr = vf->proto_xtr ? vf->proto_xtr[queue_idx] :
 				IAVF_PROTO_XTR_NONE;
@@ -677,6 +689,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 				   socket_id);
 	if (!rxq->sw_ring) {
 		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -693,6 +706,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	if (!mz) {
 		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for RX");
 		rte_free(rxq->sw_ring);
+		rte_free(rxq->stats);
 		rte_free(rxq);
 		return -ENOMEM;
 	}
@@ -1054,6 +1068,7 @@ iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 	iavf_rxq_release_mbufs_ops[q->rel_mbufs_type].release_mbufs(q);
 	rte_free(q->sw_ring);
 	rte_memzone_free(q->mz);
+	rte_free(q->stats);
 	rte_free(q);
 }
 
@@ -1581,7 +1596,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(rxm, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -1750,7 +1765,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
 		iavf_flex_rxd_to_ipsec_crypto_status(first_seg, &rxd,
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
@@ -2034,7 +2049,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
 			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
 			iavf_flex_rxd_to_ipsec_crypto_status(mb, &rxdp[j],
-				&rxq->stats.ipsec_crypto);
+				&rxq->stats->ipsec_crypto);
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
 			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 0b5d67e718..62b5a67c84 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -268,7 +268,7 @@ struct iavf_rx_queue {
 	uint8_t proto_xtr; /* protocol extraction type */
 	uint64_t xtr_ol_flag;
 		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats stats;
+	struct iavf_rx_queue_stats *stats;
 	uint64_t offloads;
 	uint64_t phc_time;
 	uint64_t hw_time_update;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 03/13] net/ixgbe: create common Rx queue structure
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 04/13] net/i40e: use the " Anatoly Burakov
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin

In preparation for deduplication effort, generalize the Rx queue structure.

Most of the fields are simply moved to common/rx.h, clarifying the comments
where necessary. There are some instances where the field is renamed when
moving, to make it more consistent with the rest of the codebase.

Specifically, the following fields are renamed:

- rdt_reg_addr -> qrx_tail (Rx ring tail register address)
- rx_using_sse -> vector_rx (indicates if vectorized path is enabled)
- mb_pool -> mp (other drivers use this name)

Additionally, some per-driver defines are now also moved to aforementioned
common Rx header, and re-defined in the driver using said common values.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  62 ++++++++
 drivers/net/intel/ixgbe/ixgbe_ethdev.c        |   8 +-
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    |   8 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx.c          | 149 +++++++++---------
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  67 +-------
 .../net/intel/ixgbe/ixgbe_rxtx_vec_common.h   |   4 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c |  22 +--
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  |  22 +--
 8 files changed, 172 insertions(+), 170 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index abb01ba5e7..524de39f9c 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -11,6 +11,68 @@
 #include <rte_ethdev.h>
 
 #define CI_RX_BURST 32
+#define CI_RX_MAX_BURST 32
+
+struct ci_rx_queue;
+
+struct ci_rx_entry {
+	struct rte_mbuf *mbuf; /* mbuf associated with RX descriptor. */
+};
+
+struct ci_rx_entry_sc {
+	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct ci_rx_queue {
+	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
+	union { /* RX ring virtual address */
+		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+	};
+	volatile uint8_t *qrx_tail;   /**< register address of tail */
+	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
+	struct ci_rx_entry_sc *sw_sc_ring; /**< address of scattered Rx software ring. */
+	rte_iova_t rx_ring_phys_addr; /**< RX ring DMA address. */
+	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+	/** hold packets to return to application */
+	struct rte_mbuf *rx_stage[CI_RX_MAX_BURST * 2];
+	uint16_t nb_rx_desc; /**< number of RX descriptors. */
+	uint16_t rx_tail;  /**< current value of tail register. */
+	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
+	uint16_t nb_rx_hold; /**< number of held free RX desc. */
+	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
+	uint16_t rx_free_thresh; /**< max free RX desc to hold. */
+	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
+	uint16_t rxrearm_nb;     /**< number of remaining to be re-armed */
+	uint16_t rxrearm_start;  /**< the idx we start the re-arming from */
+	uint16_t queue_id; /**< RX queue index. */
+	uint16_t port_id;  /**< Device port identifier. */
+	uint16_t reg_idx;  /**< RX queue register index. */
+	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool vector_rx; /**< indicates that vector RX is in use */
+	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
+	uint64_t mbuf_initializer; /**< value to init mbufs */
+	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
+	struct rte_mbuf fake_mbuf;
+	const struct rte_memzone *mz;
+	union {
+		struct { /* ixgbe specific values */
+			/** indicates that IPsec RX feature is in use */
+			uint8_t using_ipsec;
+			/** Packet type mask for different NICs. */
+			uint16_t pkt_type_mask;
+			/** UDP frames with a 0 checksum can be marked as checksum errors. */
+			uint8_t rx_udp_csum_zero_err;
+			/** flags to set in mbuf when a vlan is detected. */
+			uint64_t vlan_flags;
+		};
+	};
+};
 
 static inline uint16_t
 ci_rx_reassemble_packets(struct rte_mbuf **rx_bufs, uint16_t nb_bufs, uint8_t *split_flags,
diff --git a/drivers/net/intel/ixgbe/ixgbe_ethdev.c b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
index f1fd271a0a..df1eecc3c1 100644
--- a/drivers/net/intel/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/intel/ixgbe/ixgbe_ethdev.c
@@ -2022,7 +2022,7 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
 {
 	struct ixgbe_hwstrip *hwstrip =
 		IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(dev->data->dev_private);
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (queue >= IXGBE_MAX_RX_QUEUE_NUM)
 		return;
@@ -2157,7 +2157,7 @@ ixgbe_vlan_hw_strip_config(struct rte_eth_dev *dev)
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t ctrl;
 	uint16_t i;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool on;
 
 	PMD_INIT_FUNC_TRACE();
@@ -2200,7 +2200,7 @@ ixgbe_config_vlan_strip_on_all_queues(struct rte_eth_dev *dev, int mask)
 {
 	uint16_t i;
 	struct rte_eth_rxmode *rxmode;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	if (mask & RTE_ETH_VLAN_STRIP_MASK) {
 		rxmode = &dev->data->dev_conf.rxmode;
@@ -5789,7 +5789,7 @@ ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
 static int
 ixgbevf_vlan_offload_config(struct rte_eth_dev *dev, int mask)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t i;
 	int on = 0;
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index c1b086ef6d..1df1787c7f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -11,15 +11,15 @@
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union ixgbe_adv_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
@@ -42,7 +42,7 @@ ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 0c07ce3186..4e4afd81e4 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -1423,11 +1423,11 @@ int
 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = &rxq->ixgbe_rx_ring[desc];
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.upper.status_error;
 
@@ -1567,10 +1567,10 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t pkt_flags;
@@ -1582,7 +1582,7 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 	uint64_t vlan_flags = rxq->vlan_flags;
 
 	/* get references to current descriptor and S/W ring entry */
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	status = rxdp->wb.upper.status_error;
@@ -1667,10 +1667,10 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
 }
 
 static inline int
-ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
+ixgbe_rx_alloc_bufs(struct ci_rx_queue *rxq, bool reset_mbuf)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx;
 	__le64 dma_addr;
@@ -1679,12 +1679,12 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 	/* allocate buffers in bulk directly into the S/W ring */
 	alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
 	rxep = &rxq->sw_ring[alloc_idx];
-	diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
+	diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep,
 				    rxq->rx_free_thresh);
 	if (unlikely(diag != 0))
 		return -ENOMEM;
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = &rxq->ixgbe_rx_ring[alloc_idx];
 	for (i = 0; i < rxq->rx_free_thresh; ++i) {
 		/* populate the static rte mbuf fields */
 		mb = rxep[i].mbuf;
@@ -1711,7 +1711,7 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 }
 
 static inline uint16_t
-ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+ixgbe_rx_fill_from_stage(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
 	struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
@@ -1735,7 +1735,7 @@ static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	     uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	/* Any previously recv'd pkts will be returned from the Rx stage */
@@ -1778,8 +1778,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		/* update tail pointer */
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
-					    cur_free_trigger);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, cur_free_trigger);
 	}
 
 	if (rxq->rx_tail >= rxq->nb_rx_desc)
@@ -1825,11 +1824,11 @@ uint16_t
 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union ixgbe_adv_rx_desc *rx_ring;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
-	struct ixgbe_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
 	union ixgbe_adv_rx_desc rxd;
@@ -1847,7 +1846,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = rxq->ixgbe_rx_ring;
 	sw_ring = rxq->sw_ring;
 	vlan_flags = rxq->vlan_flags;
 	while (nb_rx < nb_pkts) {
@@ -1908,7 +1907,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) rx_id, (unsigned) staterr,
 			   (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
 
-		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (nmb == NULL) {
 			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
 				   "queue_id=%u", (unsigned) rxq->port_id,
@@ -2017,7 +2016,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   (unsigned) nb_rx);
 		rx_id = (uint16_t) ((rx_id == 0) ?
 				     (rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+		IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -2052,7 +2051,7 @@ static inline void
 ixgbe_fill_cluster_head_buf(
 	struct rte_mbuf *head,
 	union ixgbe_adv_rx_desc *desc,
-	struct ixgbe_rx_queue *rxq,
+	struct ci_rx_queue *rxq,
 	uint32_t staterr)
 {
 	uint32_t pkt_info;
@@ -2114,10 +2113,10 @@ static inline uint16_t
 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 		    bool bulk_alloc)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
-	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
-	struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
-	struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ixgbe_adv_rx_desc *rx_ring = rxq->ixgbe_rx_ring;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry_sc *sw_sc_ring = rxq->sw_sc_ring;
 	uint16_t rx_id = rxq->rx_tail;
 	uint16_t nb_rx = 0;
 	uint16_t nb_hold = rxq->nb_rx_hold;
@@ -2125,10 +2124,10 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 
 	while (nb_rx < nb_pkts) {
 		bool eop;
-		struct ixgbe_rx_entry *rxe;
-		struct ixgbe_scattered_rx_entry *sc_entry;
-		struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
-		struct ixgbe_rx_entry *next_rxe = NULL;
+		struct ci_rx_entry *rxe;
+		struct ci_rx_entry_sc *sc_entry;
+		struct ci_rx_entry_sc *next_sc_entry = NULL;
+		struct ci_rx_entry *next_rxe = NULL;
 		struct rte_mbuf *first_seg;
 		struct rte_mbuf *rxm;
 		struct rte_mbuf *nmb = NULL;
@@ -2165,7 +2164,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rte_le_to_cpu_16(rxd.wb.upper.length));
 
 		if (!bulk_alloc) {
-			nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+			nmb = rte_mbuf_raw_alloc(rxq->mp);
 			if (nmb == NULL) {
 				PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
 						  "port_id=%u queue_id=%u",
@@ -2181,7 +2180,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			if (!ixgbe_rx_alloc_bufs(rxq, false)) {
 				rte_wmb();
 				IXGBE_PCI_REG_WC_WRITE_RELAXED(
-							rxq->rdt_reg_addr,
+							rxq->qrx_tail,
 							next_rdt);
 				nb_hold -= rxq->rx_free_thresh;
 			} else {
@@ -2347,7 +2346,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
 			   rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
 
 		rte_wmb();
-		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
+		IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->qrx_tail, prev_id);
 		nb_hold = 0;
 	}
 
@@ -2969,12 +2968,12 @@ ixgbe_free_sc_cluster(struct rte_mbuf *m)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		ixgbe_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -3006,7 +3005,7 @@ ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
 }
 
 static void __rte_cold
-ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release(struct ci_rx_queue *rxq)
 {
 	if (rxq != NULL) {
 		ixgbe_rx_queue_release_mbufs(rxq);
@@ -3032,7 +3031,7 @@ ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
  *           function must be used.
  */
 static inline int __rte_cold
-check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -3069,7 +3068,7 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
 
 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
 static void __rte_cold
-ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
+ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ci_rx_queue *rxq)
 {
 	static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
 	unsigned i;
@@ -3090,7 +3089,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
 	 * reads extra memory as zeros.
 	 */
 	for (i = 0; i < len; i++) {
-		rxq->rx_ring[i] = zeroed_desc;
+		rxq->ixgbe_rx_ring[i] = zeroed_desc;
 	}
 
 	/*
@@ -3205,7 +3204,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			 struct rte_mempool *mp)
 {
 	const struct rte_memzone *rz;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_hw     *hw;
 	uint16_t len;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
@@ -3234,11 +3233,11 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 
 	/* First allocate the rx queue data structure */
-	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
+	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE, socket_id);
 	if (rxq == NULL)
 		return -ENOMEM;
-	rxq->mb_pool = mp;
+	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
 	rxq->queue_id = queue_idx;
@@ -3297,14 +3296,14 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 * Modified to setup VFRDT for Virtual Function
 	 */
 	if (ixgbe_is_vf(dev))
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
 	else
-		rxq->rdt_reg_addr =
+		rxq->qrx_tail =
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
+	rxq->ixgbe_rx_ring = (union ixgbe_adv_rx_desc *)rz->addr;
 
 	/*
 	 * Certain constraints must be met in order to use the bulk buffer
@@ -3329,7 +3328,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		len += RTE_PMD_IXGBE_RX_MAX_BURST;
 
 	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
-					  sizeof(struct ixgbe_rx_entry) * len,
+					  sizeof(struct ci_rx_entry) * len,
 					  RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3346,7 +3345,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	 */
 	rxq->sw_sc_ring =
 		rte_zmalloc_socket("rxq->sw_sc_ring",
-				   sizeof(struct ixgbe_scattered_rx_entry) * len,
+				   sizeof(struct ci_rx_entry_sc) * len,
 				   RTE_CACHE_LINE_SIZE, socket_id);
 	if (!rxq->sw_sc_ring) {
 		ixgbe_rx_queue_release(rxq);
@@ -3355,7 +3354,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
 			    "dma_addr=0x%"PRIx64,
-		     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
+		     rxq->sw_ring, rxq->sw_sc_ring, rxq->ixgbe_rx_ring,
 		     rxq->rx_ring_phys_addr);
 
 	if (!rte_is_power_of_2(nb_desc)) {
@@ -3379,11 +3378,11 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 {
 #define IXGBE_RXQ_SCAN_INTERVAL 4
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = &rxq->ixgbe_rx_ring[rxq->rx_tail];
 
 	while ((desc < rxq->nb_rx_desc) &&
 		(rxdp->wb.upper.status_error &
@@ -3391,7 +3390,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 		desc += IXGBE_RXQ_SCAN_INTERVAL;
 		rxdp += IXGBE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
+			rxdp = &(rxq->ixgbe_rx_ring[rxq->rx_tail +
 				desc - rxq->nb_rx_desc]);
 	}
 
@@ -3401,7 +3400,7 @@ ixgbe_dev_rx_queue_count(void *rx_queue)
 int
 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint32_t *status;
 	uint32_t nb_hold, desc;
 
@@ -3409,7 +3408,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 		return -EINVAL;
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-	if (rxq->rx_using_sse)
+	if (rxq->vector_rx)
 		nb_hold = rxq->rxrearm_nb;
 	else
 #endif
@@ -3421,7 +3420,7 @@ ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.upper.status_error;
+	status = &rxq->ixgbe_rx_ring[desc].wb.upper.status_error;
 	if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
 		return RTE_ETH_RX_DESC_DONE;
 
@@ -3506,7 +3505,7 @@ ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
 	}
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 		if (rxq != NULL) {
 			ixgbe_rx_queue_release_mbufs(rxq);
@@ -4668,16 +4667,16 @@ ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
 }
 
 static int __rte_cold
-ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
+ixgbe_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ixgbe_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	unsigned int i;
 
 	/* Initialize software ring entries */
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ixgbe_adv_rx_desc *rxd;
-		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
+		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (mbuf == NULL) {
 			PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
@@ -4690,7 +4689,7 @@ ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
 
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
-		rxd = &rxq->rx_ring[i];
+		rxd = &rxq->ixgbe_rx_ring[i];
 		rxd->read.hdr_addr = 0;
 		rxd->read.pkt_addr = dma_addr;
 		rxe[i].mbuf = mbuf;
@@ -5109,9 +5108,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 		dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
-		rxq->rx_using_sse = rx_using_sse;
+		rxq->vector_rx = rx_using_sse;
 #ifdef RTE_LIB_SECURITY
 		rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_SECURITY);
@@ -5187,7 +5186,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 
 	/* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		uint32_t srrctl =
 			IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
 		uint32_t rscctl =
@@ -5217,7 +5216,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 		 */
 
 		rscctl |= IXGBE_RSCCTL_RSCEN;
-		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
+		rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mp);
 		psrtype |= IXGBE_PSRTYPE_TCPHDR;
 
 		/*
@@ -5263,7 +5262,7 @@ int __rte_cold
 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint64_t bus_addr;
 	uint32_t rxctrl;
 	uint32_t fctrl;
@@ -5374,7 +5373,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -5559,7 +5558,7 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t dmatxctl;
 	uint32_t rxctrl;
@@ -5646,7 +5645,7 @@ int __rte_cold
 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5689,7 +5688,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ixgbe_hw     *hw;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t rxdctl;
 	int poll_ms;
 
@@ -5823,11 +5822,11 @@ void
 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
-	qinfo->mp = rxq->mb_pool;
+	qinfo->mp = rxq->mp;
 	qinfo->scattered_rx = dev->data->scattered_rx;
 	qinfo->nb_desc = rxq->nb_rx_desc;
 
@@ -5861,13 +5860,13 @@ void
 ixgbe_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ixgbe_adapter *adapter = dev->data->dev_private;
 
 	rxq = dev->data->rx_queues[queue_id];
 
 	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
-	recycle_rxq_info->mp = rxq->mb_pool;
+	recycle_rxq_info->mp = rxq->mp;
 	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
 	recycle_rxq_info->receive_tail = &rxq->rx_tail;
 
@@ -5889,7 +5888,7 @@ int __rte_cold
 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
 	uint64_t bus_addr;
@@ -5972,7 +5971,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 		 * The value is in 1 KB resolution. Valid values can be from
 		 * 1 KB to 16 KB.
 		 */
-		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
+		buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
 			RTE_PKTMBUF_HEADROOM);
 		srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
 			   IXGBE_SRRCTL_BSIZEPKT_MASK);
@@ -6076,7 +6075,7 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw     *hw;
 	struct ci_tx_queue *txq;
-	struct ixgbe_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t txdctl;
 	uint32_t rxdctl;
 	uint16_t i;
@@ -6270,7 +6269,7 @@ ixgbe_recv_scattered_pkts_vec(
 }
 
 int
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue __rte_unused * rxq)
 {
 	return -1;
 }
@@ -6290,7 +6289,7 @@ ixgbe_txq_vec_setup(struct ci_tx_queue *txq __rte_unused)
 }
 
 void
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue __rte_unused * rxq)
 {
 	return;
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 20a5c5a0af..84e28eb254 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _IXGBE_RXTX_H_
 #define _IXGBE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 
 /*
@@ -30,7 +31,7 @@
 #define	IXGBE_MAX_RING_DESC	8192
 
 #define RTE_PMD_IXGBE_TX_MAX_BURST 32
-#define RTE_PMD_IXGBE_RX_MAX_BURST 32
+#define RTE_PMD_IXGBE_RX_MAX_BURST CI_RX_MAX_BURST
 #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
 
 #define RTE_IXGBE_DESCS_PER_LOOP    4
@@ -66,66 +67,6 @@
 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
 #define IXGBE_PACKET_TYPE_SHIFT             0X04
 
-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct ixgbe_rx_entry {
-	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-struct ixgbe_scattered_rx_entry {
-	struct rte_mbuf *fbuf; /**< First segment of the fragmented packet. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct ixgbe_rx_queue {
-	struct rte_mempool  *mb_pool; /**< mbuf pool to populate RX ring. */
-	volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
-	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
-	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	struct ixgbe_rx_entry *sw_ring; /**< address of RX software ring. */
-	struct ixgbe_scattered_rx_entry *sw_sc_ring; /**< address of scattered Rx software ring. */
-	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
-	uint64_t            mbuf_initializer; /**< value to init mbufs */
-	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
-	uint16_t            rx_tail;  /**< current value of RDT register. */
-	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
-	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
-	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	uint8_t            rx_using_sse;
-	/**< indicates that vector RX is in use */
-#ifdef RTE_LIB_SECURITY
-	uint8_t            using_ipsec;
-	/**< indicates that IPsec RX feature is in use */
-#endif
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
-	uint16_t            rxrearm_nb;     /**< number of remaining to be re-armed */
-	uint16_t            rxrearm_start;  /**< the idx we start the re-arming from */
-#endif
-	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
-	uint16_t            queue_id; /**< RX queue index. */
-	uint16_t            reg_idx;  /**< RX queue register index. */
-	uint16_t            pkt_type_mask;  /**< Packet type mask for different NICs. */
-	uint16_t            port_id;  /**< Device port identifier. */
-	uint8_t             crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
-	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
-	uint8_t             rx_deferred_start; /**< not in global dev start. */
-	/** UDP frames with a 0 checksum can be marked as checksum errors. */
-	uint8_t             rx_udp_csum_zero_err;
-	/** flags to set in mbuf when a vlan is detected. */
-	uint64_t            vlan_flags;
-	uint64_t	    offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
-	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
-	struct rte_mbuf fake_mbuf;
-	/** hold packets to return to application */
-	struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
-	const struct rte_memzone *mz;
-};
-
 /**
  * IXGBE CTX Constants
  */
@@ -230,8 +171,8 @@ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t ixgbe_recv_scattered_pkts_vec(void *rx_queue,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);
-void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
+int ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq);
+void ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 int ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt);
 
 extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
index 018010820f..0ba3d7a4c0 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h
@@ -69,7 +69,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq)
 }
 
 static inline void
-_ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+_ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 
@@ -173,7 +173,7 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 		return -1;
 
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads))
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 9ccd8eba25..630a2e6a1d 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -12,22 +12,22 @@
 #include "ixgbe_rxtx_vec_common.h"
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 	uint8x8_t p;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
+	if (unlikely(rte_mempool_get_bulk(rxq->mp,
 					  (void *)rxep,
 					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -76,7 +76,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
 static inline void
@@ -282,11 +282,11 @@ desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint8x16_t shuf_msk = {
@@ -309,7 +309,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -488,7 +488,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -634,7 +634,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -657,7 +657,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index e125f52cc5..ecfb0d6ba6 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -13,12 +13,12 @@
 #include <rte_vect.h>
 
 static inline void
-ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
@@ -26,10 +26,10 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 
 	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mb_pool,
+	if (rte_mempool_get_bulk(rxq->mp,
 				 (void *)rxep,
 				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
 		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
@@ -86,7 +86,7 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
+	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 }
 
 #ifdef RTE_LIB_SECURITY
@@ -327,11 +327,11 @@ desc_to_ptype_v(__m128i descs[4], uint16_t pkt_type_mask,
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ixgbe_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 #ifdef RTE_LIB_SECURITY
 	uint8_t use_ipsec = rxq->using_ipsec;
@@ -377,7 +377,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = rxq->ixgbe_rx_ring + rxq->rx_tail;
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ static uint16_t
 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts)
 {
-	struct ixgbe_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,7 +755,7 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
+ixgbe_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
@@ -778,7 +778,7 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int __rte_cold
-ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
+ixgbe_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 04/13] net/i40e: use the common Rx queue structure
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 05/13] net/ice: " Anatoly Burakov
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

Make the i40e driver use the new common Rx queue structure.

Because the i40e driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_I40E_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue structure
will have both, while the actual descriptor format is picked by i40e at
compile time using the above macro. Direct usage of Rx queue structure is
now meant to be replaced with a macro access that takes descriptor size
into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  14 ++
 drivers/net/intel/i40e/i40e_ethdev.c          |   4 +-
 drivers/net/intel/i40e/i40e_ethdev.h          |   4 +-
 drivers/net/intel/i40e/i40e_fdir.c            |  16 +--
 .../i40e/i40e_recycle_mbufs_vec_common.c      |   6 +-
 drivers/net/intel/i40e/i40e_rxtx.c            | 126 +++++++++---------
 drivers/net/intel/i40e/i40e_rxtx.h            |  74 +++-------
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h |   6 +-
 .../net/intel/i40e/i40e_rxtx_vec_altivec.c    |  22 +--
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |  12 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |  12 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_common.h |   4 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  24 ++--
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  24 ++--
 14 files changed, 160 insertions(+), 188 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 524de39f9c..db49db57d0 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -30,6 +30,8 @@ struct ci_rx_queue {
 	struct rte_mempool  *mp; /**< mbuf pool to populate RX ring. */
 	union { /* RX ring virtual address */
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
+		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
+		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -51,14 +53,22 @@ struct ci_rx_queue {
 	uint16_t queue_id; /**< RX queue index. */
 	uint16_t port_id;  /**< Device port identifier. */
 	uint16_t reg_idx;  /**< RX queue register index. */
+	uint16_t rx_buf_len; /* The packet buffer size */
+	uint16_t rx_hdr_len; /* The header buffer size */
+	uint16_t max_pkt_len; /* Maximum packet length */
 	uint8_t crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
+	bool q_set; /**< indicate if rx queue has been configured */
 	bool rx_deferred_start; /**< queue is not started on dev start. */
+	bool fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
 	bool vector_rx; /**< indicates that vector RX is in use */
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
+	union { /* the VSI this queue belongs to */
+		struct i40e_vsi *i40e_vsi;
+	};
 	const struct rte_memzone *mz;
 	union {
 		struct { /* ixgbe specific values */
@@ -71,6 +81,10 @@ struct ci_rx_queue {
 			/** flags to set in mbuf when a vlan is detected. */
 			uint64_t vlan_flags;
 		};
+		struct { /* i40e specific values */
+			uint8_t hs_mode; /**< Header Split mode */
+			uint8_t dcb_tc; /**< Traffic class of rx queue */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/i40e/i40e_ethdev.c b/drivers/net/intel/i40e/i40e_ethdev.c
index 90eba3419f..e0a865845b 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.c
+++ b/drivers/net/intel/i40e/i40e_ethdev.c
@@ -6609,7 +6609,7 @@ i40e_dev_rx_init(struct i40e_pf *pf)
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int ret = I40E_SUCCESS;
 	uint16_t i;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	i40e_pf_config_rss(pf);
 	for (i = 0; i < data->nb_rx_queues; i++) {
@@ -8974,7 +8974,7 @@ i40e_pf_calc_configured_queues_num(struct i40e_pf *pf)
 {
 	struct rte_eth_dev_data *data = pf->dev_data;
 	int i, num;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	num = 0;
 	for (i = 0; i < pf->lan_nb_qps; i++) {
diff --git a/drivers/net/intel/i40e/i40e_ethdev.h b/drivers/net/intel/i40e/i40e_ethdev.h
index ccc8732d7d..44864292d0 100644
--- a/drivers/net/intel/i40e/i40e_ethdev.h
+++ b/drivers/net/intel/i40e/i40e_ethdev.h
@@ -333,7 +333,7 @@ struct i40e_vsi_list {
 	struct i40e_vsi *vsi;
 };
 
-struct i40e_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 /* Bandwidth limit information */
@@ -739,7 +739,7 @@ struct i40e_fdir_info {
 	struct i40e_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	uint16_t match_counter_index;  /* Statistic counter index used for fdir*/
 	struct ci_tx_queue *txq;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt[I40E_FDIR_PRG_PKT_CNT];     /* memory for fdir program packet */
 	uint64_t dma_addr[I40E_FDIR_PRG_PKT_CNT]; /* physic address of packet memory*/
 	/*
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 94e3ab44e3..eadcf63d1d 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -100,9 +100,9 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status);
 
 static int
-i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_fdir_rx_queue_init(struct ci_rx_queue *rxq)
 {
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct i40e_hmc_obj_rxq rx_ctx;
 	int err = I40E_SUCCESS;
 
@@ -139,7 +139,7 @@ i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
 		return err;
 	}
 	rxq->qrx_tail = hw->hw_addr +
-		I40E_QRX_TAIL(rxq->vsi->base_queue);
+		I40E_QRX_TAIL(rxq->i40e_vsi->base_queue);
 
 	rte_wmb();
 	/* Init the RX tail register. */
@@ -382,7 +382,7 @@ i40e_fdir_rx_proc_enable(struct rte_eth_dev *dev, bool on)
 	int32_t i;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		rxq->fdir_enabled = on;
@@ -929,7 +929,7 @@ i40e_build_ctob(uint32_t td_cmd,
  * tx queue
  */
 static inline int
-i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
+i40e_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
 	uint64_t qword1;
@@ -938,7 +938,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 	uint32_t error;
 	int ret = 0;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & I40E_RXD_QW1_STATUS_MASK)
 			>> I40E_RXD_QW1_STATUS_SHIFT;
@@ -987,7 +987,7 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 }
 
 static inline void
-i40e_fdir_programming_status_cleanup(struct i40e_rx_queue *rxq)
+i40e_fdir_programming_status_cleanup(struct ci_rx_queue *rxq)
 {
 	uint16_t retry_count = 0;
 
@@ -1627,7 +1627,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  bool add, bool wait_status)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct i40e_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	const struct i40e_fdir_action *fdir_action = &filter->action;
 	volatile struct i40e_tx_desc *txdp;
 	volatile struct i40e_filter_program_desc *fdirdp;
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 2875c578af..aa7703216d 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -13,15 +13,15 @@
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_queue *rxq = rx_queue;
+	struct ci_rx_entry *rxep;
 	volatile union i40e_rx_desc *rxdp;
 	uint16_t rx_id;
 	uint64_t paddr;
 	uint64_t dma_addr;
 	uint16_t i;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 	rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
 	for (i = 0; i < nb_mbufs; i++) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index c3ff2e05c3..96490296ba 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -94,12 +94,12 @@ i40e_monitor_callback(const uint64_t value,
 int
 i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile union i40e_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = I40E_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.qword1.status_error_len;
 
@@ -416,9 +416,9 @@ i40e_xmit_cleanup(struct ci_tx_queue *txq)
 
 static inline int
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-check_rx_burst_bulk_alloc_preconditions(struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 #else
-check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
+check_rx_burst_bulk_alloc_preconditions(__rte_unused struct ci_rx_queue *rxq)
 #endif
 {
 	int ret = 0;
@@ -456,10 +456,10 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
 #error "PMD I40E: I40E_LOOK_AHEAD must be 8\n"
 #endif
 static inline int
-i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
+i40e_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -467,9 +467,9 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 	int32_t s[I40E_LOOK_AHEAD], var, nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
@@ -558,7 +558,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 }
 
 static inline uint16_t
-i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
+i40e_rx_fill_from_stage(struct ci_rx_queue *rxq,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
@@ -577,10 +577,10 @@ i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
 }
 
 static inline int
-i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
+i40e_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -597,7 +597,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = I40E_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -629,7 +629,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = (struct i40e_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	struct rte_eth_dev *dev;
 	uint16_t nb_rx = 0;
 
@@ -648,7 +648,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (i40e_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 
@@ -707,12 +707,12 @@ i40e_recv_pkts_bulk_alloc(void __rte_unused *rx_queue,
 uint16_t
 i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	volatile union i40e_rx_desc *rx_ring;
 	volatile union i40e_rx_desc *rxdp;
 	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -729,9 +729,9 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
+	rx_ring = I40E_RX_RING(rxq);
 	sw_ring = rxq->sw_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -745,7 +745,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -837,12 +837,12 @@ i40e_recv_scattered_pkts(void *rx_queue,
 			 struct rte_mbuf **rx_pkts,
 			 uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
-	volatile union i40e_rx_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union i40e_rx_desc *rx_ring = I40E_RX_RING(rxq);
 	volatile union i40e_rx_desc *rxdp;
 	union i40e_rx_desc rxd;
-	struct i40e_rx_entry *sw_ring = rxq->sw_ring;
-	struct i40e_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -853,7 +853,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 	uint64_t qword1;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -867,7 +867,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+			dev = I40E_VSI_TO_ETH_DEV(rxq->i40e_vsi);
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
@@ -1798,7 +1798,7 @@ i40e_get_queue_offset_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
 int
 i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -1841,7 +1841,7 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -2004,7 +2004,7 @@ i40e_dev_first_queue(uint16_t idx, void **queues, int num)
 
 static int
 i40e_dev_rx_queue_setup_runtime(struct rte_eth_dev *dev,
-				struct i40e_rx_queue *rxq)
+				struct ci_rx_queue *rxq)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
@@ -2081,7 +2081,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct i40e_vsi *vsi;
 	struct i40e_pf *pf = NULL;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size;
 	uint16_t len, i;
@@ -2116,7 +2116,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("i40e rx queue",
-				 sizeof(struct i40e_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -2135,7 +2135,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	else
 		rxq->crc_len = 0;
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->i40e_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->offloads = offloads;
 
@@ -2164,14 +2164,14 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	memset(rz->addr, 0, ring_size);
 
 	rxq->rx_ring_phys_addr = rz->iova;
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	I40E_RX_RING(rxq) = (union i40e_rx_desc *)rz->addr;
 
 	len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST);
 
 	/* Allocate the software ring. */
 	rxq->sw_ring =
 		rte_zmalloc_socket("i40e rx sw ring",
-				   sizeof(struct i40e_rx_entry) * len,
+				   sizeof(struct ci_rx_entry) * len,
 				   RTE_CACHE_LINE_SIZE,
 				   socket_id);
 	if (!rxq->sw_ring) {
@@ -2242,7 +2242,7 @@ i40e_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 void
 i40e_rx_queue_release(void *rxq)
 {
-	struct i40e_rx_queue *q = (struct i40e_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -2260,11 +2260,11 @@ i40e_dev_rx_queue_count(void *rx_queue)
 {
 #define I40E_RXQ_SCAN_INTERVAL 4
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &(rxq->rx_ring[rxq->rx_tail]);
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	while ((desc < rxq->nb_rx_desc) &&
 		((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT) &
@@ -2277,8 +2277,8 @@ i40e_dev_rx_queue_count(void *rx_queue)
 		desc += I40E_RXQ_SCAN_INTERVAL;
 		rxdp += I40E_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-					desc - rxq->nb_rx_desc]);
+			rxdp = I40E_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -2287,7 +2287,7 @@ i40e_dev_rx_queue_count(void *rx_queue)
 int
 i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -2302,7 +2302,7 @@ i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
+	status = &I40E_RX_RING_PTR(rxq, desc)->wb.qword1.status_error_len;
 	mask = rte_le_to_cpu_64((1ULL << I40E_RX_DESC_STATUS_DD_SHIFT)
 		<< I40E_RXD_QW1_STATUS_SHIFT);
 	if (*status & mask)
@@ -2628,12 +2628,12 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
 }
 
 void
-i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
 	/* SSE Vector driver has a different way of releasing mbufs. */
-	if (rxq->rx_using_sse) {
+	if (rxq->vector_rx) {
 		i40e_rx_queue_release_mbufs_vec(rxq);
 		return;
 	}
@@ -2663,7 +2663,7 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 }
 
 void
-i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
+i40e_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned i;
 	uint16_t len;
@@ -2681,7 +2681,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 		len = rxq->nb_rx_desc;
 
 	for (i = 0; i < len * sizeof(union i40e_rx_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)I40E_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; ++i)
@@ -2898,9 +2898,9 @@ i40e_tx_queue_init(struct ci_tx_queue *txq)
 }
 
 int
-i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
+i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct i40e_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
@@ -2922,7 +2922,7 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = I40E_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
@@ -2941,10 +2941,10 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
  * and maximum packet length.
  */
 static int
-i40e_rx_queue_config(struct i40e_rx_queue *rxq)
+i40e_rx_queue_config(struct ci_rx_queue *rxq)
 {
-	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->vsi);
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
+	struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->i40e_vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
 	struct rte_eth_dev_data *data = pf->dev_data;
 	uint16_t buf_size;
 
@@ -2988,11 +2988,11 @@ i40e_rx_queue_config(struct i40e_rx_queue *rxq)
 
 /* Init the RX queue in hardware */
 int
-i40e_rx_queue_init(struct i40e_rx_queue *rxq)
+i40e_rx_queue_init(struct ci_rx_queue *rxq)
 {
 	int err = I40E_SUCCESS;
-	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
-	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->vsi);
+	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->i40e_vsi);
+	struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->i40e_vsi);
 	uint16_t pf_q = rxq->reg_idx;
 	uint16_t buf_size;
 	struct i40e_hmc_obj_rxq rx_ctx;
@@ -3166,7 +3166,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 enum i40e_status_code
 i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -3180,7 +3180,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("i40e fdir rx queue",
-				  sizeof(struct i40e_rx_queue),
+				  sizeof(struct ci_rx_queue),
 				  RTE_CACHE_LINE_SIZE,
 				  SOCKET_ID_ANY);
 	if (!rxq) {
@@ -3206,11 +3206,11 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 	rxq->nb_rx_desc = I40E_FDIR_NUM_RX_DESC;
 	rxq->queue_id = I40E_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->i40e_vsi = pf->fdir.fdir_vsi;
 
 	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, I40E_FDIR_NUM_RX_DESC * sizeof(union i40e_rx_desc));
-	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+	I40E_RX_RING(rxq) = (union i40e_rx_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -3226,7 +3226,7 @@ void
 i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -3264,7 +3264,7 @@ void
 i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
-	struct i40e_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
@@ -3335,7 +3335,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 		}
 		if (ad->rx_vec_allowed) {
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
-				struct i40e_rx_queue *rxq =
+				struct ci_rx_queue *rxq =
 					dev->data->rx_queues[i];
 
 				if (rxq && i40e_rxq_vec_setup(rxq)) {
@@ -3438,10 +3438,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 			 dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
 
 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
-			struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+			struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 
 			if (rxq)
-				rxq->rx_using_sse = rx_using_sse;
+				rxq->vector_rx = rx_using_sse;
 		}
 	}
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 2f32fc5686..4b5a84d8ef 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -6,8 +6,9 @@
 #define _I40E_RXTX_H_
 
 #include "../common/tx.h"
+#include "../common/rx.h"
 
-#define RTE_PMD_I40E_RX_MAX_BURST 32
+#define RTE_PMD_I40E_RX_MAX_BURST CI_RX_MAX_BURST
 #define RTE_PMD_I40E_TX_MAX_BURST 32
 
 #define RTE_I40E_VPMD_RX_BURST        32
@@ -67,62 +68,19 @@ enum i40e_header_split_mode {
 			       I40E_HEADER_SPLIT_UDP_TCP | \
 			       I40E_HEADER_SPLIT_SCTP)
 
-/* HW desc structure, both 16-byte and 32-byte types are supported */
+/* HW desc structures, both 16-byte and 32-byte types are supported */
 #ifdef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 #define i40e_rx_desc i40e_16byte_rx_desc
+#define I40E_RX_RING(rxq) \
+	((rxq)->i40e_rx_16b_ring)
 #else
 #define i40e_rx_desc i40e_32byte_rx_desc
+#define I40E_RX_RING(rxq) \
+	((rxq)->i40e_rx_32b_ring)
 #endif
 
-struct i40e_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
-/*
- * Structure associated with each RX queue.
- */
-struct i40e_rx_queue {
-	struct rte_mempool *mp; /**< mbuf pool to populate RX ring */
-	volatile union i40e_rx_desc *rx_ring;/**< RX ring virtual address */
-	uint64_t rx_ring_phys_addr; /**< RX ring DMA address */
-	struct i40e_rx_entry *sw_ring; /**< address of RX soft ring */
-	uint16_t nb_rx_desc; /**< number of RX descriptors */
-	uint16_t rx_free_thresh; /**< max free RX desc to hold */
-	uint16_t rx_tail; /**< current value of tail */
-	uint16_t nb_rx_hold; /**< number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[RTE_PMD_I40E_RX_MAX_BURST * 2];
-#endif
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /**< device port ID */
-	uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /**< 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /**< RX queue index */
-	uint16_t reg_idx; /**< RX queue register index */
-	uint8_t drop_en; /**< if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /**< register address of tail */
-	struct i40e_vsi *vsi; /**< the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	uint8_t hs_mode; /* Header Split mode */
-	bool q_set; /**< indicate if rx queue has been configured */
-	bool rx_deferred_start; /**< don't start this queue in dev start */
-	uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
-	uint8_t dcb_tc;         /**< Traffic class of rx queue */
-	uint64_t offloads; /**< Rx offload flags of RTE_ETH_RX_OFFLOAD_* */
-	const struct rte_memzone *mz;
-};
+#define I40E_RX_RING_PTR(rxq, entry) \
+	(I40E_RX_RING(rxq) + (entry))
 
 /** Offload features */
 union i40e_tx_offload {
@@ -172,16 +130,16 @@ uint16_t i40e_simple_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 int i40e_tx_queue_init(struct ci_tx_queue *txq);
-int i40e_rx_queue_init(struct i40e_rx_queue *rxq);
+int i40e_rx_queue_init(struct ci_rx_queue *rxq);
 void i40e_free_tx_resources(struct ci_tx_queue *txq);
-void i40e_free_rx_resources(struct i40e_rx_queue *rxq);
+void i40e_free_rx_resources(struct ci_rx_queue *rxq);
 void i40e_dev_clear_queues(struct rte_eth_dev *dev);
 void i40e_dev_free_queues(struct rte_eth_dev *dev);
-void i40e_reset_rx_queue(struct i40e_rx_queue *rxq);
+void i40e_reset_rx_queue(struct ci_rx_queue *rxq);
 void i40e_reset_tx_queue(struct ci_tx_queue *txq);
 int i40e_tx_done_cleanup(void *txq, uint32_t free_cnt);
-int i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq);
-void i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq);
+int i40e_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs(struct ci_rx_queue *rxq);
 
 uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
@@ -197,9 +155,9 @@ uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts);
 int i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
-int i40e_rxq_vec_setup(struct i40e_rx_queue *rxq);
+int i40e_rxq_vec_setup(struct ci_rx_queue *rxq);
 int i40e_txq_vec_setup(struct ci_tx_queue *txq);
-void i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq);
+void i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq);
 uint16_t i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				   uint16_t nb_pkts);
 void i40e_set_rx_function(struct rte_eth_dev *dev);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
index b66a808f9f..fd9447014b 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
@@ -13,14 +13,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-i40e_rxq_rearm_common(struct i40e_rx_queue *rxq, __rte_unused bool avx512)
+i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index 42beff6e89..3e4109e82e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -16,13 +16,13 @@
 #include <rte_altivec.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
 
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 
 	__vector unsigned long hdr_room = (__vector unsigned long){
@@ -30,7 +30,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 						RTE_PKTMBUF_HEADROOM};
 	__vector unsigned long dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING(rxq) + rxq->rxrearm_start;
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -195,16 +195,16 @@ desc_to_ptype_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__vector unsigned char shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__vector unsigned short crc_adjust = (__vector unsigned short){
 		0, 0,         /* ignore pkt_type field */
@@ -221,7 +221,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -465,7 +465,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -611,15 +611,15 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 9c406e7a6f..0f3f7430aa 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -16,7 +16,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, false);
 }
@@ -105,16 +105,16 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts, uint8_t *split_packet)
 {
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union i40e_rx_desc *rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 	rte_prefetch0(rxdp);
 
@@ -625,7 +625,7 @@ static uint16_t
 i40e_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index d8244556c0..f2292b45e8 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -18,7 +18,7 @@
 #define RTE_I40E_DESCS_PER_LOOP_AVX 8
 
 static __rte_always_inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	i40e_rxq_rearm_common(rxq, true);
 }
@@ -108,14 +108,14 @@ desc_fdir_processing_32b(volatile union i40e_rx_desc *rxdp,
 
 /* Force inline as some compilers will not inline by default. */
 static __rte_always_inline uint16_t
-_recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			  uint16_t nb_pkts, uint8_t *split_packet)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct i40e_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union i40e_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union i40e_rx_desc *rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -693,7 +693,7 @@ i40e_recv_scattered_burst_vec_avx512(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts)
 {
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
index ba72df8e13..d19b9e4bf4 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
@@ -21,7 +21,7 @@ i40e_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+_i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned mask = rxq->nb_rx_desc - 1;
 	unsigned i;
@@ -68,7 +68,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 	 */
 	ad->rx_vec_allowed = true;
 	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+		struct ci_rx_queue *rxq = dev->data->rx_queues[i];
 		if (!rxq)
 			continue;
 		if (!ci_rxq_vec_capable(rxq->nb_rx_desc, rxq->rx_free_thresh, rxq->offloads)) {
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index d16ceb6b5d..814aa666dc 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -17,18 +17,18 @@
 #include "i40e_rxtx_vec_common.h"
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	uint64x2_t dma_addr0, dma_addr1;
 	uint64x2_t zero = vdupq_n_u64(0);
 	uint64_t paddr;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (unlikely(rte_mempool_get_bulk(rxq->mp,
@@ -203,7 +203,7 @@ descs_to_fdir_16b(uint32x4_t fltstat, uint64x2_t descs[4], struct rte_mbuf **rx_
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	uint32x4_t vlan0, vlan1, rss, l3_l4e;
@@ -332,15 +332,15 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -374,7 +374,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch_non_temporal(rxdp);
 
@@ -592,7 +592,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -738,15 +738,15 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 774519265b..74cd59e245 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -15,18 +15,18 @@
 #include <rte_vect.h>
 
 static inline void
-i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -207,7 +207,7 @@ descs_to_fdir_16b(__m128i fltstat, __m128i descs[4], struct rte_mbuf **rx_pkt)
 #endif
 
 static inline void
-desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
 		  __m128i descs[4], struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -347,16 +347,16 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union i40e_rx_desc *rxdp;
-	struct i40e_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->i40e_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -382,7 +382,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = I40E_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -609,7 +609,7 @@ i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
 
-	struct i40e_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -755,15 +755,15 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+i40e_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	_i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+i40e_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
-	rxq->rx_using_sse = 1;
+	rxq->vector_rx = 1;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
 	return 0;
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 05/13] net/ice: use the common Rx queue structure
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (2 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 04/13] net/i40e: use the " Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 06/13] net/iavf: " Anatoly Burakov
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson

Make the ice driver use the new common Rx queue structure.

Because the ice driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_ICE_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue
structure will have both, while the actual descriptor format is picked by
ice at compile time using the above macro. Direct usage of Rx queue
structure is now meant to be replaced with a macro access that takes
descriptor size into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h               |  22 +++
 drivers/net/intel/ice/ice_dcf.c             |   6 +-
 drivers/net/intel/ice/ice_dcf_ethdev.c      |  22 +--
 drivers/net/intel/ice/ice_ethdev.c          |   2 +-
 drivers/net/intel/ice/ice_ethdev.h          |   5 +-
 drivers/net/intel/ice/ice_rxtx.c            | 158 ++++++++++----------
 drivers/net/intel/ice/ice_rxtx.h            |  78 ++--------
 drivers/net/intel/ice/ice_rxtx_common_avx.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |  14 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |  16 +-
 drivers/net/intel/ice/ice_rxtx_vec_common.h |   6 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  22 +--
 12 files changed, 163 insertions(+), 194 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index db49db57d0..b4836e7914 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -12,6 +12,7 @@
 
 #define CI_RX_BURST 32
 #define CI_RX_MAX_BURST 32
+#define CI_RX_MAX_NSEG 2
 
 struct ci_rx_queue;
 
@@ -23,6 +24,8 @@ struct ci_rx_entry_sc {
 	struct rte_mbuf *fbuf; /* First segment of the fragmented packet.*/
 };
 
+typedef void (*ci_rx_release_mbufs_t)(struct ci_rx_queue *rxq);
+
 /**
  * Structure associated with each RX queue.
  */
@@ -32,6 +35,8 @@ struct ci_rx_queue {
 		volatile union ixgbe_adv_rx_desc *ixgbe_rx_ring;
 		volatile union i40e_16byte_rx_desc *i40e_rx_16b_ring;
 		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
+		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
+		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -64,10 +69,16 @@ struct ci_rx_queue {
 	bool drop_en;  /**< if 1, drop packets if no descriptors are available. */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 	uint64_t offloads; /**< Rx offloads with RTE_ETH_RX_OFFLOAD_* */
+	uint32_t rxdid; /**< RX descriptor format ID. */
+	uint32_t proto_xtr; /* protocol extraction type */
+	uint64_t xtr_ol_flag; /* flexible descriptor metadata extraction offload flag */
+	off_t xtr_field_offs; /* Protocol extraction matedata offset*/
+	uint64_t hw_time_update; /**< Last time HW timestamp was updated */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
+		struct ice_vsi *ice_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -85,6 +96,17 @@ struct ci_rx_queue {
 			uint8_t hs_mode; /**< Header Split mode */
 			uint8_t dcb_tc; /**< Traffic class of rx queue */
 		};
+		struct { /* ice specific values */
+			ci_rx_release_mbufs_t rx_rel_mbufs; /**< release mbuf function */
+			/** holds buffer split information */
+			struct rte_eth_rxseg_split rxseg[CI_RX_MAX_NSEG];
+			struct ci_rx_entry *sw_split_buf; /**< Buffer split SW ring */
+			uint32_t rxseg_nb; /**< number of buffer split segments */
+			uint32_t time_high; /* high 32 bits of hardware timestamp register */
+			uint32_t hw_time_high; /* high 32 bits of timestamp */
+			uint32_t hw_time_low; /* low 32 bits of timestamp */
+			bool ts_enable; /* if rxq timestamp is enabled */
+		};
 	};
 };
 
diff --git a/drivers/net/intel/ice/ice_dcf.c b/drivers/net/intel/ice/ice_dcf.c
index 65c18921f4..fddf5bbde5 100644
--- a/drivers/net/intel/ice/ice_dcf.c
+++ b/drivers/net/intel/ice/ice_dcf.c
@@ -1175,8 +1175,8 @@ ice_dcf_init_rss(struct ice_dcf_hw *hw)
 int
 ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)hw->eth_dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)hw->eth_dev->data->rx_queues;
 	struct ci_tx_queue **txq =
 		(struct ci_tx_queue **)hw->eth_dev->data->tx_queues;
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -1211,7 +1211,7 @@ ice_dcf_configure_queues(struct ice_dcf_hw *hw)
 
 		vc_qp->rxq.max_pkt_size = rxq[i]->max_pkt_len;
 		vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
-		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_dma;
+		vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 		vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
 
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index efff76afa8..53272ddd60 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -106,7 +106,7 @@ ice_dcf_xmit_pkts(__rte_unused void *tx_queue,
 }
 
 static int
-ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
+ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct ice_dcf_adapter *dcf_ad = dev->data->dev_private;
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -145,8 +145,8 @@ ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq)
 static int
 ice_dcf_init_rx_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue **rxq =
-		(struct ice_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -282,7 +282,7 @@ ice_dcf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 }
 
 static int
-alloc_rxq_mbufs(struct ice_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
@@ -305,7 +305,7 @@ alloc_rxq_mbufs(struct ice_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = ICE_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
@@ -324,7 +324,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct iavf_hw *hw = &ad->real_hw.avf;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -358,7 +358,7 @@ ice_dcf_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 }
 
 static inline void
-reset_rx_queue(struct ice_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -369,7 +369,7 @@ reset_rx_queue(struct ice_rx_queue *rxq)
 	len = rxq->nb_rx_desc + ICE_RX_MAX_BURST;
 
 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)ICE_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
@@ -429,7 +429,7 @@ ice_dcf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	if (rx_queue_id >= dev->data->nb_rx_queues)
@@ -511,7 +511,7 @@ ice_dcf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 ice_dcf_start_queues(struct rte_eth_dev *dev)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int nb_rxq = 0;
 	int nb_txq, i;
@@ -638,7 +638,7 @@ ice_dcf_stop_queues(struct rte_eth_dev *dev)
 {
 	struct ice_dcf_adapter *ad = dev->data->dev_private;
 	struct ice_dcf_hw *hw = &ad->real_hw;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int ret, i;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.c b/drivers/net/intel/ice/ice_ethdev.c
index 2e163d706c..65cf586502 100644
--- a/drivers/net/intel/ice/ice_ethdev.c
+++ b/drivers/net/intel/ice/ice_ethdev.c
@@ -6690,7 +6690,7 @@ ice_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_adapter *ad =
 			ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint32_t ts_high;
 	uint64_t ts_ns;
 
diff --git a/drivers/net/intel/ice/ice_ethdev.h b/drivers/net/intel/ice/ice_ethdev.h
index afe8dae497..0ed223d83e 100644
--- a/drivers/net/intel/ice/ice_ethdev.h
+++ b/drivers/net/intel/ice/ice_ethdev.h
@@ -257,9 +257,6 @@ struct ice_vsi_list {
 	struct ice_vsi *vsi;
 };
 
-struct ice_rx_queue;
-struct ci_tx_queue;
-
 /**
  * Structure that defines a VSI, associated with a adapter.
  */
@@ -409,7 +406,7 @@ struct ice_fdir_counter_pool_container {
 struct ice_fdir_info {
 	struct ice_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	struct ci_tx_queue *txq;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	void *prg_pkt;                 /* memory for fdir program packet */
 	uint64_t dma_addr;             /* physic address of packet memory*/
 	const struct rte_memzone *mz;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 40ac01e782..4749ee729f 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -37,11 +37,11 @@ int
 ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = ICE_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.status_error0;
 
@@ -73,7 +73,7 @@ ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union ice_rx_flex_desc *rxdp)
 {
@@ -95,7 +95,7 @@ ice_rxd_to_pkt_fields_by_comms_generic(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				   struct rte_mbuf *mb,
 				   volatile union ice_rx_flex_desc *rxdp)
 {
@@ -120,7 +120,7 @@ ice_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
 				      volatile union ice_rx_flex_desc *rxdp)
 {
@@ -164,7 +164,7 @@ ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq,
 }
 
 static inline void
-ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq,
+ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				      struct rte_mbuf *mb,
 				      volatile union ice_rx_flex_desc *rxdp)
 {
@@ -215,7 +215,7 @@ static const ice_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[] = {
 };
 
 void
-ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
+ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -243,17 +243,17 @@ ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid)
 }
 
 static int
-ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	struct ice_pf *pf = ICE_VSI_TO_PF(vsi);
-	struct rte_eth_dev_data *dev_data = rxq->vsi->adapter->pf.dev_data;
+	struct rte_eth_dev_data *dev_data = rxq->ice_vsi->adapter->pf.dev_data;
 	struct ice_rlan_ctx rx_ctx;
 	uint16_t buf_size;
 	uint32_t rxdid = ICE_RXDID_COMMS_OVS;
 	uint32_t regval;
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 	uint32_t frame_size = dev_data->mtu + ICE_ETH_OVERHEAD;
 	int err;
 
@@ -371,7 +371,7 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 		rx_ctx.dtype = 0; /* No Protocol Based Buffer Split mode */
 	}
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -452,15 +452,15 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 /* Allocate mbufs for all descriptors in rx queue */
 static int
-ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
+ice_alloc_rx_queue_mbufs(struct ci_rx_queue *rxq)
 {
-	struct ice_rx_entry *rxe = rxq->sw_ring;
+	struct ci_rx_entry *rxe = rxq->sw_ring;
 	uint64_t dma_addr;
 	uint16_t i;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
 		volatile union ice_rx_flex_desc *rxd;
-		rxd = &rxq->rx_ring[i];
+		rxd = ICE_RX_RING_PTR(rxq, i);
 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
 
 		if (unlikely(!mbuf)) {
@@ -514,7 +514,7 @@ ice_alloc_rx_queue_mbufs(struct ice_rx_queue *rxq)
 
 /* Free all mbufs for descriptors in rx queue */
 static void
-_ice_rx_queue_release_mbufs(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -591,7 +591,7 @@ ice_switch_rx_queue(struct ice_hw *hw, uint16_t q_idx, bool on)
 }
 
 static inline int
-ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
+ice_check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
 {
 	int ret = 0;
 
@@ -618,9 +618,9 @@ ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq)
 	return ret;
 }
 
-/* reset fields in ice_rx_queue back to default */
+/* reset fields in ci_rx_queue back to default */
 static void
-ice_reset_rx_queue(struct ice_rx_queue *rxq)
+ice_reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	unsigned int i;
 	uint16_t len;
@@ -633,7 +633,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 	len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST);
 
 	for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)ICE_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 	for (i = 0; i < ICE_RX_MAX_BURST; ++i)
@@ -655,7 +655,7 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -715,7 +715,7 @@ ice_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 ice_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -834,9 +834,9 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 }
 
 static int
-ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
+ice_fdir_program_hw_rx_queue(struct ci_rx_queue *rxq)
 {
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
 	uint32_t rxdid = ICE_RXDID_LEGACY_1;
 	struct ice_rlan_ctx rx_ctx;
@@ -848,7 +848,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
+	rx_ctx.base = rxq->rx_ring_phys_addr / ICE_QUEUE_BASE_ADDR_UNIT;
 	rx_ctx.qlen = rxq->nb_rx_desc;
 	rx_ctx.dbuf = rxq->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
 	rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
@@ -909,7 +909,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq)
 int
 ice_fdir_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1099,7 +1099,7 @@ ice_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 int
 ice_fdir_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1170,7 +1170,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct ice_vsi *vsi = pf->main_vsi;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size, tlen;
 	uint16_t len;
@@ -1206,7 +1206,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket(NULL,
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 
@@ -1240,7 +1240,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 		rxq->crc_len = 0;
 
 	rxq->drop_en = rx_conf->rx_drop_en;
-	rxq->vsi = vsi;
+	rxq->ice_vsi = vsi;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->proto_xtr = pf->proto_xtr != NULL ?
 			 pf->proto_xtr[queue_idx] : PROTO_XTR_NONE;
@@ -1274,8 +1274,8 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 	/* Zero all the descriptors in the ring. */
 	memset(rz->addr, 0, ring_size);
 
-	rxq->rx_ring_dma = rz->iova;
-	rxq->rx_ring = rz->addr;
+	rxq->rx_ring_phys_addr = rz->iova;
+	ICE_RX_RING(rxq) = rz->addr;
 
 	/* always reserve more for bulk alloc */
 	len = (uint16_t)(nb_desc + ICE_RX_MAX_BURST);
@@ -1287,7 +1287,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* Allocate the software ring. */
 	rxq->sw_ring = rte_zmalloc_socket(NULL,
-					  sizeof(struct ice_rx_entry) * tlen,
+					  sizeof(struct ci_rx_entry) * tlen,
 					  RTE_CACHE_LINE_SIZE,
 					  socket_id);
 	if (!rxq->sw_ring) {
@@ -1324,7 +1324,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,
 void
 ice_rx_queue_release(void *rxq)
 {
-	struct ice_rx_queue *q = (struct ice_rx_queue *)rxq;
+	struct ci_rx_queue *q = (struct ci_rx_queue *)rxq;
 
 	if (!q) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
@@ -1548,7 +1548,7 @@ void
 ice_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		 struct rte_eth_rxq_info *qinfo)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -1586,11 +1586,11 @@ ice_rx_queue_count(void *rx_queue)
 {
 #define ICE_RXQ_SCAN_INTERVAL 4
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	while ((desc < rxq->nb_rx_desc) &&
 	       rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	       (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)) {
@@ -1602,8 +1602,8 @@ ice_rx_queue_count(void *rx_queue)
 		desc += ICE_RXQ_SCAN_INTERVAL;
 		rxdp += ICE_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-				 desc - rxq->nb_rx_desc]);
+			rxdp = ICE_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -1695,25 +1695,25 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
 #define ICE_PTP_TS_VALID 0x1
 
 static inline int
-ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
+ice_rx_scan_hw_ring(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len, hdr_len;
 	int32_t s[ICE_LOOK_AHEAD], nb_dd;
 	int32_t i, j, nb_rx = 0;
 	uint64_t pkt_flags = 0;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 #endif
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -1843,7 +1843,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 }
 
 static inline uint16_t
-ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
+ice_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -1862,10 +1862,10 @@ ice_rx_fill_from_stage(struct ice_rx_queue *rxq,
 }
 
 static inline int
-ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
+ice_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -1894,7 +1894,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 		}
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = ICE_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
@@ -1933,7 +1933,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = (struct ice_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -1951,7 +1951,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (ice_rx_alloc_bufs(rxq) != 0) {
 			uint16_t i, j;
 
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed +=
 				rxq->rx_free_thresh;
 			PMD_RX_LOG(DEBUG, "Rx mbuf alloc failed for "
 				   "port_id=%u, queue_id=%u",
@@ -2006,12 +2006,12 @@ ice_recv_scattered_pkts(void *rx_queue,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_flex_desc *rx_ring = ICE_RX_RING(rxq);
 	volatile union ice_rx_flex_desc *rxdp;
 	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
@@ -2023,13 +2023,13 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2050,7 +2050,7 @@ ice_recv_scattered_pkts(void *rx_queue,
 		/* allocate mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
@@ -2319,7 +2319,7 @@ int
 ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint32_t desc;
 
 	if (unlikely(offset >= rxq->nb_rx_desc))
@@ -2332,7 +2332,7 @@ ice_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = ICE_RX_RING_PTR(rxq, desc);
 	if (rte_le_to_cpu_16(rxdp->wb.status_error0) &
 	    (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S))
 		return RTE_ETH_RX_DESC_DONE;
@@ -2459,7 +2459,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
 int
 ice_fdir_setup_rx_resources(struct ice_pf *pf)
 {
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *rz = NULL;
 	uint32_t ring_size;
 	struct rte_eth_dev *dev;
@@ -2473,7 +2473,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 
 	/* Allocate the RX queue data structure. */
 	rxq = rte_zmalloc_socket("ice fdir rx queue",
-				 sizeof(struct ice_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 SOCKET_ID_ANY);
 	if (!rxq) {
@@ -2499,12 +2499,12 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf)
 	rxq->nb_rx_desc = ICE_FDIR_NUM_RX_DESC;
 	rxq->queue_id = ICE_FDIR_QUEUE_ID;
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
-	rxq->vsi = pf->fdir.fdir_vsi;
+	rxq->ice_vsi = pf->fdir.fdir_vsi;
 
-	rxq->rx_ring_dma = rz->iova;
+	rxq->rx_ring_phys_addr = rz->iova;
 	memset(rz->addr, 0, ICE_FDIR_NUM_RX_DESC *
 	       sizeof(union ice_32byte_rx_desc));
-	rxq->rx_ring = (union ice_rx_flex_desc *)rz->addr;
+	ICE_RX_RING(rxq) = (union ice_rx_flex_desc *)rz->addr;
 
 	/*
 	 * Don't need to allocate software ring and reset for the fdir
@@ -2523,12 +2523,12 @@ ice_recv_pkts(void *rx_queue,
 	      struct rte_mbuf **rx_pkts,
 	      uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
-	volatile union ice_rx_flex_desc *rx_ring = rxq->rx_ring;
+	struct ci_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_flex_desc *rx_ring = ICE_RX_RING(rxq);
 	volatile union ice_rx_flex_desc *rxdp;
 	union ice_rx_flex_desc rxd;
-	struct ice_rx_entry *sw_ring = rxq->sw_ring;
-	struct ice_rx_entry *rxe;
+	struct ci_rx_entry *sw_ring = rxq->sw_ring;
+	struct ci_rx_entry *rxe;
 	struct rte_mbuf *nmb; /* new allocated mbuf */
 	struct rte_mbuf *nmb_pay; /* new allocated payload mbuf */
 	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
@@ -2540,13 +2540,13 @@ ice_recv_pkts(void *rx_queue,
 	uint16_t rx_stat_err0;
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	bool is_tsinit = false;
 	uint64_t ts_ns;
-	struct ice_vsi *vsi = rxq->vsi;
+	struct ice_vsi *vsi = rxq->ice_vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	struct ice_adapter *ad = rxq->vsi->adapter;
+	struct ice_adapter *ad = rxq->ice_vsi->adapter;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -2567,7 +2567,7 @@ ice_recv_pkts(void *rx_queue,
 		/* allocate header mbuf */
 		nmb = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!nmb)) {
-			rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+			rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 			break;
 		}
 
@@ -2594,7 +2594,7 @@ ice_recv_pkts(void *rx_queue,
 			/* allocate payload mbuf */
 			nmb_pay = rte_mbuf_raw_alloc(rxq->rxseg[1].mp);
 			if (unlikely(!nmb_pay)) {
-				rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
+				rxq->ice_vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++;
 				rxe->mbuf = NULL;
 				nb_hold--;
 				if (unlikely(rx_id == 0))
@@ -3472,7 +3472,7 @@ ice_set_rx_function(struct rte_eth_dev *dev)
 	struct ice_adapter *ad =
 		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int i;
 	int rx_check_ret = -1;
 
@@ -4634,7 +4634,7 @@ ice_set_default_ptype_table(struct rte_eth_dev *dev)
  * tx queue
  */
 static inline int
-ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
+ice_check_fdir_programming_status(struct ci_rx_queue *rxq)
 {
 	volatile union ice_32byte_rx_desc *rxdp;
 	uint64_t qword1;
@@ -4644,7 +4644,7 @@ ice_check_fdir_programming_status(struct ice_rx_queue *rxq)
 	int ret = -EAGAIN;
 
 	rxdp = (volatile union ice_32byte_rx_desc *)
-		(&rxq->rx_ring[rxq->rx_tail]);
+			ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 	rx_status = (qword1 & ICE_RXD_QW1_STATUS_M)
 			>> ICE_RXD_QW1_STATUS_S;
@@ -4689,7 +4689,7 @@ int
 ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
 {
 	struct ci_tx_queue *txq = pf->fdir.txq;
-	struct ice_rx_queue *rxq = pf->fdir.rxq;
+	struct ci_rx_queue *rxq = pf->fdir.rxq;
 	volatile struct ice_fltr_desc *fdirdp;
 	volatile struct ice_tx_desc *txdp;
 	uint32_t td_cmd;
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 276d40b57f..1a39770d7d 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -5,6 +5,7 @@
 #ifndef _ICE_RXTX_H_
 #define _ICE_RXTX_H_
 
+#include "../common/rx.h"
 #include "../common/tx.h"
 #include "ice_ethdev.h"
 
@@ -14,21 +15,28 @@
 #define ICE_DMA_MEM_ALIGN    4096
 #define ICE_RING_BASE_ALIGN  128
 
-#define ICE_RX_MAX_BURST 32
+#define ICE_RX_MAX_BURST CI_RX_MAX_BURST
 #define ICE_TX_MAX_BURST 32
 
 /* Maximal number of segments to split. */
-#define ICE_RX_MAX_NSEG 2
+#define ICE_RX_MAX_NSEG CI_RX_MAX_NSEG
 
 #define ICE_CHK_Q_ENA_COUNT        100
 #define ICE_CHK_Q_ENA_INTERVAL_US  100
 
 #ifdef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 #define ice_rx_flex_desc ice_16b_rx_flex_desc
+#define ICE_RX_RING(rxq) \
+	((rxq)->ice_rx_16b_ring)
 #else
 #define ice_rx_flex_desc ice_32b_rx_flex_desc
+#define ICE_RX_RING(rxq) \
+	((rxq)->ice_rx_32b_ring)
 #endif
 
+#define ICE_RX_RING_PTR(rxq, entry) \
+	(ICE_RX_RING(rxq) + (entry))
+
 #define ICE_SUPPORT_CHAIN_NUM 5
 
 #define ICE_TD_CMD                      ICE_TX_DESC_CMD_EOP
@@ -78,74 +86,16 @@ extern int ice_timestamp_dynfield_offset;
 
 #define ICE_TX_MTU_SEG_MAX	8
 
-typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
-typedef void (*ice_rxd_to_pkt_fields_t)(struct ice_rx_queue *rxq,
+typedef void (*ice_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 					struct rte_mbuf *mb,
 					volatile union ice_rx_flex_desc *rxdp);
 
-struct ice_rx_entry {
-	struct rte_mbuf *mbuf;
-};
-
 enum ice_rx_dtype {
 	ICE_RX_DTYPE_NO_SPLIT       = 0,
 	ICE_RX_DTYPE_HEADER_SPLIT   = 1,
 	ICE_RX_DTYPE_SPLIT_ALWAYS   = 2,
 };
 
-struct ice_rx_queue {
-	struct rte_mempool *mp; /* mbuf pool to populate RX ring */
-	volatile union ice_rx_flex_desc *rx_ring;/* RX ring virtual address */
-	rte_iova_t rx_ring_dma; /* RX ring DMA address */
-	struct ice_rx_entry *sw_ring; /* address of RX soft ring */
-	uint16_t nb_rx_desc; /* number of RX descriptors */
-	uint16_t rx_free_thresh; /* max free RX desc to hold */
-	uint16_t rx_tail; /* current value of tail */
-	uint16_t nb_rx_hold; /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
-	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
-	uint16_t rx_nb_avail; /**< number of staged packets ready */
-	uint16_t rx_next_avail; /**< index of next staged packets */
-	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
-	struct rte_mbuf *rx_stage[ICE_RX_MAX_BURST * 2];
-
-	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
-	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /**< value to init mbufs */
-
-	uint16_t port_id; /* device port ID */
-	uint8_t crc_len; /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled; /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id; /* RX queue index */
-	uint16_t reg_idx; /* RX queue register index */
-	uint8_t drop_en; /* if not 0, set register bit */
-	volatile uint8_t *qrx_tail; /* register address of tail */
-	struct ice_vsi *vsi; /* the VSI this queue belongs to */
-	uint16_t rx_buf_len; /* The packet buffer size */
-	uint16_t rx_hdr_len; /* The header buffer size */
-	uint16_t max_pkt_len; /* Maximum packet length */
-	bool q_set; /* indicate if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	uint8_t proto_xtr; /* Protocol extraction from flexible descriptor */
-	int xtr_field_offs; /*Protocol extraction matedata offset*/
-	uint64_t xtr_ol_flag; /* Protocol extraction offload flag */
-	uint32_t rxdid; /* Receive Flex Descriptor profile ID */
-	ice_rx_release_mbufs_t rx_rel_mbufs;
-	uint64_t offloads;
-	uint32_t time_high;
-	uint32_t hw_register_set;
-	const struct rte_memzone *mz;
-	uint32_t hw_time_high; /* high 32 bits of timestamp */
-	uint32_t hw_time_low; /* low 32 bits of timestamp */
-	uint64_t hw_time_update; /* SW time of HW record updating */
-	struct ice_rx_entry *sw_split_buf;
-	/* address of temp buffer for RX split mbufs */
-	struct rte_eth_rxseg_split rxseg[ICE_RX_MAX_NSEG];
-	uint32_t rxseg_nb;
-	bool ts_enable; /* if rxq timestamp is enabled */
-};
-
 /* Offload features */
 union ice_tx_offload {
 	uint64_t data;
@@ -249,12 +199,12 @@ int ice_tx_descriptor_status(void *tx_queue, uint16_t offset);
 void ice_set_default_ptype_table(struct rte_eth_dev *dev);
 const uint32_t *ice_dev_supported_ptypes_get(struct rte_eth_dev *dev,
 					     size_t *no_of_elements);
-void ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq,
+void ice_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq,
 					  uint32_t rxdid);
 
 int ice_rx_vec_dev_check(struct rte_eth_dev *dev);
 int ice_tx_vec_dev_check(struct rte_eth_dev *dev);
-int ice_rxq_vec_setup(struct ice_rx_queue *rxq);
+int ice_rxq_vec_setup(struct ci_rx_queue *rxq);
 int ice_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
@@ -299,7 +249,7 @@ int ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 #define FDIR_PARSING_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->pf.dev_data->nb_rx_queues; i++) { \
-		struct ice_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
index c62e60c70e..7209c902db 100644
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ b/drivers/net/intel/ice/ice_rxtx_common_avx.h
@@ -9,14 +9,14 @@
 
 #ifdef __AVX2__
 static __rte_always_inline void
-ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
+ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 0c54b325c6..f4555369a2 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -8,7 +8,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, false);
 }
@@ -33,17 +33,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet,
 			    bool offload)
 {
 #define ICE_DESCS_PER_LOOP_AVX 8
 
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ice_rx_flex_desc *rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -445,7 +445,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -694,7 +694,7 @@ static __rte_always_inline uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts, bool offload)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index bd49be07c9..6eea74d703 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -10,7 +10,7 @@
 #define ICE_DESCS_PER_LOOP_AVX 8
 
 static __rte_always_inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	ice_rxq_rearm_common(rxq, true);
 }
@@ -35,17 +35,17 @@ ice_flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
+_ice_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			      struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts,
 			      uint8_t *split_packet,
 			      bool do_offload)
 {
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct ice_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union ice_rx_flex_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union ice_rx_flex_desc *rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -467,7 +467,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 			 * needs to load 2nd 16B of each desc for RSS hash parsing,
 			 * will cause performance drop to get into this context.
 			 */
-			if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+			if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 					RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 				/* load bottom half of every 32B desc */
 				const __m128i raw_desc_bh7 = _mm_load_si128
@@ -723,7 +723,7 @@ static uint16_t
 ice_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -765,7 +765,7 @@ ice_recv_scattered_burst_vec_avx512_offload(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_common.h b/drivers/net/intel/ice/ice_rxtx_vec_common.h
index 7933c26366..9430a99ba5 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/intel/ice/ice_rxtx_vec_common.h
@@ -17,7 +17,7 @@ ice_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
+_ice_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -79,7 +79,7 @@ _ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
 #define ICE_VECTOR_OFFLOAD_PATH	1
 
 static inline int
-ice_rx_vec_queue_default(struct ice_rx_queue *rxq)
+ice_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -119,7 +119,7 @@ static inline int
 ice_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct ice_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret = 0;
 	int result = 0;
 
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 97f05ba45e..dc9d37226a 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -26,18 +26,18 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 }
 
 static inline void
-ice_rxq_rearm(struct ice_rx_queue *rxq)
+ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 					  RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -105,7 +105,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 }
 
 static inline void
-ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
+ice_rx_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 			 struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -301,15 +301,15 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_ice_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union ice_rx_flex_desc *rxdp;
-	struct ice_rx_entry *sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
 	__m128i crc_adjust = _mm_set_epi16
 				(0, 0, 0,       /* ignore non-length fields */
 				 -rxq->crc_len, /* sub crc on data_len */
@@ -361,7 +361,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = ICE_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -482,7 +482,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		 * needs to load 2nd 16B of each desc for RSS hash parsing,
 		 * will cause performance drop to get into this context.
 		 */
-		if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+		if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
 				RTE_ETH_RX_OFFLOAD_RSS_HASH) {
 			/* load bottom half of every 32B desc */
 			const __m128i raw_desc_bh3 =
@@ -608,7 +608,7 @@ static uint16_t
 ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts)
 {
-	struct ice_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
 	/* get some new buffers */
@@ -779,7 +779,7 @@ ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 int __rte_cold
-ice_rxq_vec_setup(struct ice_rx_queue *rxq)
+ice_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 06/13] net/iavf: use the common Rx queue structure
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (3 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 05/13] net/ice: " Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson, Vladimir Medvedkin, Ian Stokes

Make the iavf driver use the new common Rx queue structure.

Because the iavf driver supports both 16-byte and 32-byte descriptor
formats (controlled by RTE_LIBRTE_IAVF_16BYTE_RX_DESC define), the common
queue structure has to take that into account, so the ring queue structure
will have both, while the actual descriptor format is picked by iavf at
compile time using the above macro. Direct usage of Rx queue structure is
now meant to be replaced with a macro access that takes descriptor size
into account.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h                 |  12 ++
 drivers/net/intel/iavf/iavf.h                 |   4 +-
 drivers/net/intel/iavf/iavf_ethdev.c          |  12 +-
 drivers/net/intel/iavf/iavf_rxtx.c            | 192 +++++++++---------
 drivers/net/intel/iavf/iavf_rxtx.h            |  76 ++-----
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |  24 +--
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |  22 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h |  27 ++-
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  12 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  46 ++---
 drivers/net/intel/iavf/iavf_vchnl.c           |   6 +-
 11 files changed, 198 insertions(+), 235 deletions(-)

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index b4836e7914..507235f4c6 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -37,6 +37,8 @@ struct ci_rx_queue {
 		volatile union i40e_32byte_rx_desc *i40e_rx_32b_ring;
 		volatile union ice_16b_rx_flex_desc *ice_rx_16b_ring;
 		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
+		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
+		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
@@ -79,6 +81,7 @@ struct ci_rx_queue {
 	union { /* the VSI this queue belongs to */
 		struct i40e_vsi *i40e_vsi;
 		struct ice_vsi *ice_vsi;
+		struct iavf_vsi *iavf_vsi;
 	};
 	const struct rte_memzone *mz;
 	union {
@@ -107,6 +110,15 @@ struct ci_rx_queue {
 			uint32_t hw_time_low; /* low 32 bits of timestamp */
 			bool ts_enable; /* if rxq timestamp is enabled */
 		};
+		struct { /* iavf specific values */
+			const struct iavf_rxq_ops *ops; /**< queue ops */
+			struct iavf_rx_queue_stats *stats; /**< per-queue stats */
+			uint64_t phc_time; /**< HW timestamp */
+			uint8_t rel_mbufs_type; /**< type of release mbuf function */
+			uint8_t rx_flags; /**< Rx VLAN tag location flags */
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
+#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
+		};
 	};
 };
 
diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 97e6b243fb..f81c939c96 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -97,7 +97,7 @@
 #define IAVF_L2TPV2_FLAGS_LEN	0x4000
 
 struct iavf_adapter;
-struct iavf_rx_queue;
+struct ci_rx_queue;
 struct ci_tx_queue;
 
 
@@ -555,7 +555,7 @@ int iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
 		uint8_t *resp_msg, size_t resp_msg_len);
 extern const struct rte_tm_ops iavf_tm_ops;
 int iavf_get_ptp_cap(struct iavf_adapter *adapter);
-int iavf_get_phc_time(struct iavf_rx_queue *rxq);
+int iavf_get_phc_time(struct ci_rx_queue *rxq);
 int iavf_flow_sub(struct iavf_adapter *adapter,
 		  struct iavf_fsub_conf *filter);
 int iavf_flow_unsub(struct iavf_adapter *adapter,
diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c
index 5babd587b3..4e843a3532 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -728,7 +728,7 @@ iavf_dev_configure(struct rte_eth_dev *dev)
 }
 
 static int
-iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
+iavf_init_rxq(struct rte_eth_dev *dev, struct ci_rx_queue *rxq)
 {
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_eth_dev_data *dev_data = dev->data;
@@ -779,8 +779,8 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
 static int
 iavf_init_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue **rxq =
-		(struct iavf_rx_queue **)dev->data->rx_queues;
+	struct ci_rx_queue **rxq =
+		(struct ci_rx_queue **)dev->data->rx_queues;
 	int i, ret = IAVF_SUCCESS;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -955,7 +955,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
 static int
 iavf_start_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 	uint16_t nb_txq, nb_rxq;
@@ -1867,9 +1867,9 @@ iavf_dev_update_ipsec_xstats(struct rte_eth_dev *ethdev,
 {
 	uint16_t idx;
 	for (idx = 0; idx < ethdev->data->nb_rx_queues; idx++) {
-		struct iavf_rx_queue *rxq;
+		struct ci_rx_queue *rxq;
 		struct iavf_ipsec_crypto_stats *stats;
-		rxq = (struct iavf_rx_queue *)ethdev->data->rx_queues[idx];
+		rxq = (struct ci_rx_queue *)ethdev->data->rx_queues[idx];
 		stats = &rxq->stats->ipsec_crypto;
 		ips->icount += stats->icount;
 		ips->ibytes += stats->ibytes;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index d23d2df807..a9ce4b55d9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -128,12 +128,12 @@ iavf_monitor_callback(const uint64_t value,
 int
 iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile union iavf_rx_desc *rxdp;
 	uint16_t desc;
 
 	desc = rxq->rx_tail;
-	rxdp = &rxq->rx_ring[desc];
+	rxdp = IAVF_RX_RING_PTR(rxq, desc);
 	/* watch for changes in status bit */
 	pmc->addr = &rxdp->wb.qword1.status_error_len;
 
@@ -222,7 +222,7 @@ check_tx_vec_allow(struct ci_tx_queue *txq)
 }
 
 static inline bool
-check_rx_bulk_allow(struct iavf_rx_queue *rxq)
+check_rx_bulk_allow(struct ci_rx_queue *rxq)
 {
 	int ret = true;
 
@@ -243,7 +243,7 @@ check_rx_bulk_allow(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-reset_rx_queue(struct iavf_rx_queue *rxq)
+reset_rx_queue(struct ci_rx_queue *rxq)
 {
 	uint16_t len;
 	uint32_t i;
@@ -254,12 +254,12 @@ reset_rx_queue(struct iavf_rx_queue *rxq)
 	len = rxq->nb_rx_desc + IAVF_RX_MAX_BURST;
 
 	for (i = 0; i < len * sizeof(union iavf_rx_desc); i++)
-		((volatile char *)rxq->rx_ring)[i] = 0;
+		((volatile char *)IAVF_RX_RING(rxq))[i] = 0;
 
 	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
 
 	for (i = 0; i < IAVF_RX_MAX_BURST; i++)
-		rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf;
+		rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
 
 	/* for rx bulk */
 	rxq->rx_nb_avail = 0;
@@ -315,7 +315,7 @@ reset_tx_queue(struct ci_tx_queue *txq)
 }
 
 static int
-alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
+alloc_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	volatile union iavf_rx_desc *rxd;
 	struct rte_mbuf *mbuf = NULL;
@@ -326,8 +326,8 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(!mbuf)) {
 			for (j = 0; j < i; j++) {
-				rte_pktmbuf_free_seg(rxq->sw_ring[j]);
-				rxq->sw_ring[j] = NULL;
+				rte_pktmbuf_free_seg(rxq->sw_ring[j].mbuf);
+				rxq->sw_ring[j].mbuf = NULL;
 			}
 			PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
 			return -ENOMEM;
@@ -342,7 +342,7 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
-		rxd = &rxq->rx_ring[i];
+		rxd = IAVF_RX_RING_PTR(rxq, i);
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
@@ -350,14 +350,14 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		rxd->read.rsvd2 = 0;
 #endif
 
-		rxq->sw_ring[i] = mbuf;
+		rxq->sw_ring[i].mbuf = mbuf;
 	}
 
 	return 0;
 }
 
 static inline void
-release_rxq_mbufs(struct iavf_rx_queue *rxq)
+release_rxq_mbufs(struct ci_rx_queue *rxq)
 {
 	uint16_t i;
 
@@ -365,9 +365,9 @@ release_rxq_mbufs(struct iavf_rx_queue *rxq)
 		return;
 
 	for (i = 0; i < rxq->nb_rx_desc; i++) {
-		if (rxq->sw_ring[i]) {
-			rte_pktmbuf_free_seg(rxq->sw_ring[i]);
-			rxq->sw_ring[i] = NULL;
+		if (rxq->sw_ring[i].mbuf) {
+			rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+			rxq->sw_ring[i].mbuf = NULL;
 		}
 	}
 
@@ -395,7 +395,7 @@ struct iavf_rxq_ops iavf_rxq_release_mbufs_ops[] = {
 };
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct ci_rx_queue *rxq,
 				    struct rte_mbuf *mb,
 				    volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -420,7 +420,7 @@ iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -462,7 +462,7 @@ iavf_rxd_to_pkt_fields_by_comms_aux_v1(struct iavf_rx_queue *rxq,
 }
 
 static inline void
-iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq,
+iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct ci_rx_queue *rxq,
 				       struct rte_mbuf *mb,
 				       volatile union iavf_rx_flex_desc *rxdp)
 {
@@ -517,7 +517,7 @@ iavf_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[IAVF_RXDID_LAST + 1] = {
 };
 
 static void
-iavf_select_rxd_to_pkt_fields_handler(struct iavf_rx_queue *rxq, uint32_t rxdid)
+iavf_select_rxd_to_pkt_fields_handler(struct ci_rx_queue *rxq, uint32_t rxdid)
 {
 	rxq->rxdid = rxdid;
 
@@ -572,7 +572,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	struct iavf_info *vf =
 		IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_vsi *vsi = &vf->vsi;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	const struct rte_memzone *mz;
 	uint32_t ring_size;
 	uint8_t proto_xtr;
@@ -610,7 +610,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 
 	/* Allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("iavf rxq",
-				 sizeof(struct iavf_rx_queue),
+				 sizeof(struct ci_rx_queue),
 				 RTE_CACHE_LINE_SIZE,
 				 socket_id);
 	if (!rxq) {
@@ -668,7 +668,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	rxq->port_id = dev->data->port_id;
 	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 	rxq->rx_hdr_len = 0;
-	rxq->vsi = vsi;
+	rxq->iavf_vsi = vsi;
 	rxq->offloads = offloads;
 
 	if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
@@ -713,7 +713,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	/* Zero all the descriptors in the ring. */
 	memset(mz->addr, 0, ring_size);
 	rxq->rx_ring_phys_addr = mz->iova;
-	rxq->rx_ring = (union iavf_rx_desc *)mz->addr;
+	IAVF_RX_RING(rxq) = (union iavf_rx_desc *)mz->addr;
 
 	rxq->mz = mz;
 	reset_rx_queue(rxq);
@@ -905,7 +905,7 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err = 0;
 
 	PMD_DRV_FUNC_TRACE();
@@ -997,7 +997,7 @@ iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int err;
 
 	PMD_DRV_FUNC_TRACE();
@@ -1060,7 +1060,7 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 void
 iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-	struct iavf_rx_queue *q = dev->data->rx_queues[qid];
+	struct ci_rx_queue *q = dev->data->rx_queues[qid];
 
 	if (!q)
 		return;
@@ -1089,7 +1089,7 @@ iavf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 static void
 iavf_reset_queues(struct rte_eth_dev *dev)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	struct ci_tx_queue *txq;
 	int i;
 
@@ -1375,7 +1375,7 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
  * from the hardware point of view.
  */
 static inline void
-iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+iavf_update_rx_tail(struct ci_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
 {
 	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
 
@@ -1397,9 +1397,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	volatile union iavf_rx_desc *rx_ring;
 	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1416,8 +1416,8 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = IAVF_RX_RING(rxq);
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1442,13 +1442,13 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1456,9 +1456,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1506,9 +1506,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 {
 	volatile union iavf_rx_desc *rx_ring;
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_eth_dev *dev;
 	struct rte_mbuf *rxm;
 	struct rte_mbuf *nmb;
@@ -1525,8 +1525,8 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 	nb_hold = 0;
 	rxq = rx_queue;
 	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	rx_ring = IAVF_RX_RING(rxq);
+	ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1559,13 +1559,13 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (unlikely(rx_id == rxq->nb_rx_desc))
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1573,9 +1573,9 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
@@ -1629,9 +1629,9 @@ uint16_t
 iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 				  uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	union iavf_rx_flex_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1643,9 +1643,9 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t pkt_flags;
 	uint64_t ts_ns;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_desc *rx_ring = IAVF_RX_RING(rxq);
 	volatile union iavf_rx_flex_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
 		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
@@ -1678,13 +1678,13 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1692,10 +1692,10 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1806,9 +1806,9 @@ uint16_t
 iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	union iavf_rx_desc rxd;
-	struct rte_mbuf *rxe;
+	struct ci_rx_entry rxe;
 	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 	struct rte_mbuf *nmb, *rxm;
@@ -1820,9 +1820,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint64_t dma_addr;
 	uint64_t pkt_flags;
 
-	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_desc *rx_ring = IAVF_RX_RING(rxq);
 	volatile union iavf_rx_desc *rxdp;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1847,13 +1847,13 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxd = *rxdp;
 		nb_hold++;
 		rxe = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
+		rxq->sw_ring[rx_id].mbuf = nmb;
 		rx_id++;
 		if (rx_id == rxq->nb_rx_desc)
 			rx_id = 0;
 
 		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
+		rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 
 		/* When next RX descriptor is on a cache line boundary,
 		 * prefetch the next 4 RX descriptors and next 8 pointers
@@ -1861,10 +1861,10 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		if ((rx_id & 0x3) == 0) {
 			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id].mbuf);
 		}
 
-		rxm = rxe;
+		rxm = rxe.mbuf;
 		dma_addr =
 			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
@@ -1963,12 +1963,12 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 #define IAVF_LOOK_AHEAD 8
 static inline int
-iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
+iavf_rx_scan_hw_ring_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts)
 {
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t stat_err0;
 	uint16_t pkt_len;
@@ -1976,10 +1976,10 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 	uint64_t ts_ns;
 
-	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxdp = (volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2038,7 +2038,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 					  rxq->rx_tail +
 					  i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
 				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
 			mb->data_len = pkt_len;
@@ -2072,11 +2072,11 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else {
 				/* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2090,16 +2090,16 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq,
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline int
-iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+iavf_rx_scan_hw_ring(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t pkt_len;
 	uint64_t qword1;
@@ -2108,9 +2108,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 	int32_t i, j, nb_rx = 0;
 	int32_t nb_staged = 0;
 	uint64_t pkt_flags;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	rxep = &rxq->sw_ring[rxq->rx_tail];
 
 	qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
@@ -2164,7 +2164,7 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
 					 rxq->rx_tail + i * IAVF_LOOK_AHEAD + j);
 
-			mb = rxep[j];
+			mb = rxep[j].mbuf;
 			qword1 = rte_le_to_cpu_64
 					(rxdp[j].wb.qword1.status_error_len);
 			pkt_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
@@ -2190,10 +2190,10 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 			/* Put up to nb_pkts directly into buffers */
 			if ((i + j) < nb_pkts) {
-				rx_pkts[i + j] = rxep[j];
+				rx_pkts[i + j] = rxep[j].mbuf;
 				nb_rx++;
 			} else { /* Stage excess pkts received */
-				rxq->rx_stage[nb_staged] = rxep[j];
+				rxq->rx_stage[nb_staged] = rxep[j].mbuf;
 				nb_staged++;
 			}
 		}
@@ -2207,13 +2207,13 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint1
 
 	/* Clear software ring entries */
 	for (i = 0; i < (nb_rx + nb_staged); i++)
-		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 
 	return nb_rx;
 }
 
 static inline uint16_t
-iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
+iavf_rx_fill_from_stage(struct ci_rx_queue *rxq,
 		       struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
 {
@@ -2232,10 +2232,10 @@ iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
 }
 
 static inline int
-iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
+iavf_rx_alloc_bufs(struct ci_rx_queue *rxq)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep;
+	struct ci_rx_entry *rxep;
 	struct rte_mbuf *mb;
 	uint16_t alloc_idx, i;
 	uint64_t dma_addr;
@@ -2252,13 +2252,13 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 		return -ENOMEM;
 	}
 
-	rxdp = &rxq->rx_ring[alloc_idx];
+	rxdp = IAVF_RX_RING_PTR(rxq, alloc_idx);
 	for (i = 0; i < rxq->rx_free_thresh; i++) {
 		if (likely(i < (rxq->rx_free_thresh - 1)))
 			/* Prefetch next mbuf */
-			rte_prefetch0(rxep[i + 1]);
+			rte_prefetch0(rxep[i + 1].mbuf);
 
-		mb = rxep[i];
+		mb = rxep[i].mbuf;
 		rte_mbuf_refcnt_set(mb, 1);
 		mb->next = NULL;
 		mb->data_off = RTE_PKTMBUF_HEADROOM;
@@ -2284,7 +2284,7 @@ iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
 static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = (struct iavf_rx_queue *)rx_queue;
+	struct ci_rx_queue *rxq = (struct ci_rx_queue *)rx_queue;
 	uint16_t nb_rx = 0;
 
 	if (!nb_pkts)
@@ -2312,11 +2312,11 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 			rxq->rx_tail = (uint16_t)(rxq->rx_tail - (nb_rx + nb_staged));
 			for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++) {
-				rxq->sw_ring[j] = rx_pkts[i];
+				rxq->sw_ring[j].mbuf = rx_pkts[i];
 				rx_pkts[i] = NULL;
 			}
 			for (i = 0, j = rxq->rx_tail + nb_rx; i < nb_staged; i++, j++) {
-				rxq->sw_ring[j] = rxq->rx_stage[i];
+				rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
 				rx_pkts[i] = NULL;
 			}
 
@@ -3843,13 +3843,13 @@ static uint16_t
 iavf_recv_pkts_no_poll(void *rx_queue, struct rte_mbuf **rx_pkts,
 				uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	enum iavf_rx_burst_type rx_burst_type;
 
-	if (!rxq->vsi || rxq->vsi->adapter->no_poll)
+	if (!rxq->iavf_vsi || rxq->iavf_vsi->adapter->no_poll)
 		return 0;
 
-	rx_burst_type = rxq->vsi->adapter->rx_burst_type;
+	rx_burst_type = rxq->iavf_vsi->adapter->rx_burst_type;
 
 	return iavf_rx_pkt_burst_ops[rx_burst_type].pkt_burst(rx_queue,
 								rx_pkts, nb_pkts);
@@ -3965,7 +3965,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	enum iavf_rx_burst_type rx_burst_type;
 	int no_poll_on_link_down = adapter->devargs.no_poll_on_link_down;
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	bool use_flex = true;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -4379,7 +4379,7 @@ void
 iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		     struct rte_eth_rxq_info *qinfo)
 {
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 
 	rxq = dev->data->rx_queues[queue_id];
 
@@ -4414,11 +4414,11 @@ iavf_dev_rxq_count(void *rx_queue)
 {
 #define IAVF_RXQ_SCAN_INTERVAL 4
 	volatile union iavf_rx_desc *rxdp;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	uint16_t desc = 0;
 
 	rxq = rx_queue;
-	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
@@ -4431,8 +4431,8 @@ iavf_dev_rxq_count(void *rx_queue)
 		desc += IAVF_RXQ_SCAN_INTERVAL;
 		rxdp += IAVF_RXQ_SCAN_INTERVAL;
 		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
-			rxdp = &(rxq->rx_ring[rxq->rx_tail +
-					desc - rxq->nb_rx_desc]);
+			rxdp = IAVF_RX_RING_PTR(rxq,
+					rxq->rx_tail + desc - rxq->nb_rx_desc);
 	}
 
 	return desc;
@@ -4441,7 +4441,7 @@ iavf_dev_rxq_count(void *rx_queue)
 int
 iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	volatile uint64_t *status;
 	uint64_t mask;
 	uint32_t desc;
@@ -4456,7 +4456,7 @@ iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 	if (desc >= rxq->nb_rx_desc)
 		desc -= rxq->nb_rx_desc;
 
-	status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
+	status = &IAVF_RX_RING_PTR(rxq, desc)->wb.qword1.status_error_len;
 	mask = rte_le_to_cpu_64((1ULL << IAVF_RX_DESC_STATUS_DD_SHIFT)
 		<< IAVF_RXD_QW1_STATUS_SHIFT);
 	if (*status & mask)
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 62b5a67c84..c43ddc3c2f 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -17,7 +17,7 @@
 #define IAVF_RING_BASE_ALIGN      128
 
 /* used for Rx Bulk Allocate */
-#define IAVF_RX_MAX_BURST         32
+#define IAVF_RX_MAX_BURST         CI_RX_MAX_BURST
 
 /* Max data buffer size must be 16K - 128 bytes */
 #define IAVF_RX_MAX_DATA_BUF_SIZE (16 * 1024 - 128)
@@ -198,17 +198,24 @@ union iavf_32b_rx_flex_desc {
 #ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 #define iavf_rx_desc iavf_16byte_rx_desc
 #define iavf_rx_flex_desc iavf_16b_rx_flex_desc
+#define IAVF_RX_RING(rxq) \
+	((rxq)->iavf_rx_16b_ring)
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
 #define iavf_rx_flex_desc iavf_32b_rx_flex_desc
+#define IAVF_RX_RING(rxq) \
+	((rxq)->iavf_rx_32b_ring)
 #endif
 
-typedef void (*iavf_rxd_to_pkt_fields_t)(struct iavf_rx_queue *rxq,
+#define IAVF_RX_RING_PTR(rxq, entry) \
+	(IAVF_RX_RING(rxq) + (entry))
+
+typedef void (*iavf_rxd_to_pkt_fields_t)(struct ci_rx_queue *rxq,
 				struct rte_mbuf *mb,
 				volatile union iavf_rx_flex_desc *rxdp);
 
 struct iavf_rxq_ops {
-	void (*release_mbufs)(struct iavf_rx_queue *rxq);
+	void (*release_mbufs)(struct ci_rx_queue *rxq);
 };
 
 struct iavf_txq_ops {
@@ -221,59 +228,6 @@ struct iavf_rx_queue_stats {
 	struct iavf_ipsec_crypto_stats ipsec_crypto;
 };
 
-/* Structure associated with each Rx queue. */
-struct iavf_rx_queue {
-	struct rte_mempool *mp;       /* mbuf pool to populate Rx ring */
-	const struct rte_memzone *mz; /* memzone for Rx ring */
-	volatile union iavf_rx_desc *rx_ring; /* Rx ring virtual address */
-	uint64_t rx_ring_phys_addr;   /* Rx ring DMA address */
-	struct rte_mbuf **sw_ring;     /* address of SW ring */
-	uint16_t nb_rx_desc;          /* ring length */
-	uint16_t rx_tail;             /* current value of tail */
-	volatile uint8_t *qrx_tail;   /* register address of tail */
-	uint16_t rx_free_thresh;      /* max free RX desc to hold */
-	uint16_t nb_rx_hold;          /* number of held free RX desc */
-	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
-	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
-	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
-	uint8_t rxdid;
-	uint8_t rel_mbufs_type;
-
-	/* used for VPMD */
-	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
-	uint16_t rxrearm_start;    /* the idx we start the re-arming from */
-	uint64_t mbuf_initializer; /* value to init mbufs */
-
-	/* for rx bulk */
-	uint16_t rx_nb_avail;      /* number of staged packets ready */
-	uint16_t rx_next_avail;    /* index of next staged packets */
-	uint16_t rx_free_trigger;  /* triggers rx buffer allocation */
-	struct rte_mbuf *rx_stage[IAVF_RX_MAX_BURST * 2]; /* store mbuf */
-
-	uint16_t port_id;        /* device port ID */
-	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
-	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
-	uint16_t queue_id;      /* Rx queue index */
-	uint16_t rx_buf_len;    /* The packet buffer size */
-	uint16_t rx_hdr_len;    /* The header buffer size */
-	uint16_t max_pkt_len;   /* Maximum packet length */
-	struct iavf_vsi *vsi; /**< the VSI this queue belongs to */
-
-	bool q_set;             /* if rx queue has been configured */
-	bool rx_deferred_start; /* don't start this queue in dev start */
-	const struct iavf_rxq_ops *ops;
-	uint8_t rx_flags;
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1     BIT(0)
-#define IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2   BIT(1)
-	uint8_t proto_xtr; /* protocol extraction type */
-	uint64_t xtr_ol_flag;
-		/* flexible descriptor metadata extraction offload flag */
-	struct iavf_rx_queue_stats *stats;
-	uint64_t offloads;
-	uint64_t phc_time;
-	uint64_t hw_time_update;
-};
-
 /* Offload features */
 union iavf_tx_offload {
 	uint64_t data;
@@ -691,7 +645,7 @@ uint16_t iavf_xmit_pkts_vec_avx2_offload(void *tx_queue, struct rte_mbuf **tx_pk
 int iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 int iavf_rx_vec_dev_check(struct rte_eth_dev *dev);
 int iavf_tx_vec_dev_check(struct rte_eth_dev *dev);
-int iavf_rxq_vec_setup(struct iavf_rx_queue *rxq);
+int iavf_rxq_vec_setup(struct ci_rx_queue *rxq);
 int iavf_txq_vec_setup(struct ci_tx_queue *txq);
 uint16_t iavf_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts);
@@ -731,12 +685,12 @@ uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
 
 void iavf_set_default_ptype_table(struct rte_eth_dev *dev);
 void iavf_tx_queue_release_mbufs_avx512(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq);
 void iavf_tx_queue_release_mbufs_sse(struct ci_tx_queue *txq);
-void iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq);
+void iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq);
 
 static inline
-void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq,
+void iavf_dump_rx_descriptor(struct ci_rx_queue *rxq,
 			    const volatile void *desc,
 			    uint16_t rx_id)
 {
@@ -794,7 +748,7 @@ void iavf_dump_tx_descriptor(const struct ci_tx_queue *txq,
 #define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
 	int i; \
 	for (i = 0; i < (ad)->dev_data->nb_rx_queues; i++) { \
-		struct iavf_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
+		struct ci_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \
 		if (!rxq) \
 			continue; \
 		rxq->fdir_enabled = on; \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index 88e35dc3e9..f51fa4acf9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -7,7 +7,7 @@
 #include <rte_vect.h>
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, false);
 }
@@ -15,21 +15,19 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 #define PKTLEN_SHIFT     10
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2(struct ci_rx_queue *rxq,
 			     struct rte_mbuf **rx_pkts,
 			     uint16_t nb_pkts, uint8_t *split_packet,
 			     bool offload)
 {
 #define IAVF_DESCS_PER_LOOP_AVX 8
 
-	/* const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; */
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	/* struct iavf_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail]; */
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_desc *rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
@@ -487,14 +485,14 @@ flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct ci_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
 				      uint16_t nb_pkts, uint8_t *split_packet,
 				      bool offload)
 {
 #define IAVF_DESCS_PER_LOOP_AVX 8
 
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
@@ -503,9 +501,9 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+		(volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -1476,7 +1474,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				   uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1565,7 +1563,7 @@ iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
 					    struct rte_mbuf **rx_pkts,
 					    uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index f2af028bef..80495f33cd 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -28,26 +28,26 @@
 #define IAVF_RX_TS_OFFLOAD
 
 static __rte_always_inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	iavf_rxq_rearm_common(rxq, true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512(struct ci_rx_queue *rxq,
 			       struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts, uint8_t *split_packet,
 			       bool offload)
 {
 #ifdef IAVF_RX_PTYPE_OFFLOAD
-	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *type_table = rxq->iavf_vsi->adapter->ptype_tbl;
 #endif
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
-	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_desc *rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -578,13 +578,13 @@ flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)
 }
 
 static __rte_always_inline uint16_t
-_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
+_iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct ci_rx_queue *rxq,
 					struct rte_mbuf **rx_pkts,
 					uint16_t nb_pkts,
 					uint8_t *split_packet,
 					bool offload)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -594,9 +594,9 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,
 						    rxq->mbuf_initializer);
-	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	struct ci_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_flex_desc *rxdp =
-		(volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+		(volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -1653,7 +1653,7 @@ static __rte_always_inline uint16_t
 iavf_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts, bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
@@ -1729,7 +1729,7 @@ iavf_recv_scattered_burst_vec_avx512_flex_rxd(void *rx_queue,
 					      uint16_t nb_pkts,
 					      bool offload)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 
 	/* get some new buffers */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index 38e9a206d9..f0a7d19b6a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -8,7 +8,6 @@
 #include <ethdev_driver.h>
 #include <rte_malloc.h>
 
-#include "../common/rx.h"
 #include "iavf.h"
 #include "iavf_rxtx.h"
 
@@ -21,7 +20,7 @@ iavf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
 }
 
 static inline void
-_iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
+_iavf_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq)
 {
 	const unsigned int mask = rxq->nb_rx_desc - 1;
 	unsigned int i;
@@ -32,15 +31,15 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 	/* free all mbufs that are valid in the ring */
 	if (rxq->rxrearm_nb == 0) {
 		for (i = 0; i < rxq->nb_rx_desc; i++) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	} else {
 		for (i = rxq->rx_tail;
 		     i != rxq->rxrearm_start;
 		     i = (i + 1) & mask) {
-			if (rxq->sw_ring[i])
-				rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+			if (rxq->sw_ring[i].mbuf)
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
 		}
 	}
 
@@ -51,7 +50,7 @@ _iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 }
 
 static inline int
-iavf_rx_vec_queue_default(struct iavf_rx_queue *rxq)
+iavf_rx_vec_queue_default(struct ci_rx_queue *rxq)
 {
 	if (!rxq)
 		return -1;
@@ -117,7 +116,7 @@ static inline int
 iavf_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 {
 	int i;
-	struct iavf_rx_queue *rxq;
+	struct ci_rx_queue *rxq;
 	int ret;
 	int result = 0;
 
@@ -240,14 +239,14 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 
 #ifdef RTE_ARCH_X86
 static __rte_always_inline void
-iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
+iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
 {
 	int i;
 	uint16_t rx_id;
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -259,7 +258,7 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -278,8 +277,8 @@ iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index a583340f15..e1c8f3c7f9 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -15,7 +15,7 @@
 #include "iavf_rxtx_vec_common.h"
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
@@ -75,7 +75,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
+desc_to_olflags_v(struct ci_rx_queue *rxq, volatile union iavf_rx_desc *rxdp,
 		  uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
 {
 	RTE_SET_USED(rxdp);
@@ -193,7 +193,7 @@ desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **__rte_restrict rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
+_recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
 		   struct rte_mbuf **__rte_restrict rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
@@ -203,7 +203,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
 	struct rte_mbuf **sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
 	uint8x16_t shuf_msk = {
@@ -394,13 +394,13 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_neon(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_neon(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_NEON_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 2e41079e88..f18dfd636c 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -13,19 +13,19 @@
 #include <rte_vect.h>
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
 	int i;
 	uint16_t rx_id;
 
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 	__m128i dma_addr0, dma_addr1;
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
@@ -33,7 +33,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
 			dma_addr0 = _mm_setzero_si128();
 			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i] = &rxq->fake_mbuf;
+				rxp[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
 						dma_addr0);
 			}
@@ -47,8 +47,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
 		__m128i vaddr0, vaddr1;
 
-		mb0 = rxp[0];
-		mb1 = rxp[1];
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
@@ -88,7 +88,7 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 }
 
 static inline void
-desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		  struct rte_mbuf **rx_pkts)
 {
 	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
@@ -206,11 +206,11 @@ flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
 		       struct rte_mbuf **rx_pkts)
 #else
 static inline void
-flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+flex_desc_to_olflags_v(struct ci_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
 #endif
 {
@@ -466,16 +466,16 @@ flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		   uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	const uint32_t *ptype_tbl = rxq->iavf_vsi->adapter->ptype_tbl;
 
 	__m128i crc_adjust = _mm_set_epi16(
 				0, 0, 0,    /* ignore non-length fields */
@@ -500,7 +500,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = rxq->rx_ring + rxq->rx_tail;
+	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -571,7 +571,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -714,16 +714,16 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
  * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
-_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+_recv_raw_pkts_vec_flex_rxd(struct ci_rx_queue *rxq,
 			    struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet)
 {
 	volatile union iavf_rx_flex_desc *rxdp;
-	struct rte_mbuf **sw_ring;
+	struct ci_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
 	uint64_t var;
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;
 #endif
@@ -779,7 +779,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* Just the act of getting into the function from the application is
 	 * going to cost about 7 cycles
 	 */
-	rxdp = (volatile union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+	rxdp = (volatile union iavf_rx_flex_desc *)IAVF_RX_RING_PTR(rxq, rxq->rx_tail);
 
 	rte_prefetch0(rxdp);
 
@@ -857,7 +857,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 #endif
 
 		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
-		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos].mbuf);
 		/* Read desc statuses backwards to avoid race condition */
 		/* A.1 load desc[3] */
 		descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3));
@@ -1207,7 +1207,7 @@ static uint16_t
 iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			      uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1276,7 +1276,7 @@ iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
 				       struct rte_mbuf **rx_pkts,
 				       uint16_t nb_pkts)
 {
-	struct iavf_rx_queue *rxq = rx_queue;
+	struct ci_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
 	unsigned int i = 0;
 
@@ -1449,7 +1449,7 @@ iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 void __rte_cold
-iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq)
+iavf_rx_queue_release_mbufs_sse(struct ci_rx_queue *rxq)
 {
 	_iavf_rx_queue_release_mbufs_vec(rxq);
 }
@@ -1462,7 +1462,7 @@ iavf_txq_vec_setup(struct ci_tx_queue *txq)
 }
 
 int __rte_cold
-iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
+iavf_rxq_vec_setup(struct ci_rx_queue *rxq)
 {
 	rxq->rel_mbufs_type = IAVF_REL_MBUFS_SSE_VEC;
 	rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
diff --git a/drivers/net/intel/iavf/iavf_vchnl.c b/drivers/net/intel/iavf/iavf_vchnl.c
index 6feca8435e..9f8bb07726 100644
--- a/drivers/net/intel/iavf/iavf_vchnl.c
+++ b/drivers/net/intel/iavf/iavf_vchnl.c
@@ -1218,7 +1218,7 @@ int
 iavf_configure_queues(struct iavf_adapter *adapter,
 		uint16_t num_queue_pairs, uint16_t index)
 {
-	struct iavf_rx_queue **rxq = (struct iavf_rx_queue **)adapter->dev_data->rx_queues;
+	struct ci_rx_queue **rxq = (struct ci_rx_queue **)adapter->dev_data->rx_queues;
 	struct ci_tx_queue **txq = (struct ci_tx_queue **)adapter->dev_data->tx_queues;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_vsi_queue_config_info *vc_config;
@@ -2258,9 +2258,9 @@ iavf_get_ptp_cap(struct iavf_adapter *adapter)
 }
 
 int
-iavf_get_phc_time(struct iavf_rx_queue *rxq)
+iavf_get_phc_time(struct ci_rx_queue *rxq)
 {
-	struct iavf_adapter *adapter = rxq->vsi->adapter;
+	struct iavf_adapter *adapter = rxq->iavf_vsi->adapter;
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 	struct virtchnl_phc_time phc_time;
 	struct iavf_cmd_info args;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (4 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 06/13] net/iavf: " Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson

There is certain amount of duplication between various drivers when it
comes to Rx ring rearm. This patch takes implementation from ice driver
as a base because it has support for no IOVA in mbuf as well as all
vector implementations, and moves them to a common file.

The driver Rx rearm code used copious amounts of #ifdef-ery to
discriminate between 16- and 32-byte descriptor support, but we cannot do
that in the common code because we will not have access to those
definitions. So, instead, we use copious amounts of compile-time constant
propagation and force-inlining to ensure that the compiler generates
effectively the same code it generated back when it was in the driver. We
also add a compile-time definition for vectorization levels for x86
vector instructions to discriminate between different instruction sets.
This too is constant-propagated, and thus should not affect performance.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx.h               |   3 +
 drivers/net/intel/common/rx_vec_sse.h       | 323 ++++++++++++++++++++
 drivers/net/intel/ice/ice_rxtx.h            |   2 +-
 drivers/net/intel/ice/ice_rxtx_common_avx.h | 233 --------------
 drivers/net/intel/ice/ice_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/ice/ice_rxtx_vec_sse.c    |  77 +----
 7 files changed, 336 insertions(+), 312 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_sse.h
 delete mode 100644 drivers/net/intel/ice/ice_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h
index 507235f4c6..b084224e34 100644
--- a/drivers/net/intel/common/rx.h
+++ b/drivers/net/intel/common/rx.h
@@ -13,6 +13,8 @@
 #define CI_RX_BURST 32
 #define CI_RX_MAX_BURST 32
 #define CI_RX_MAX_NSEG 2
+#define CI_VPMD_DESCS_PER_LOOP 4
+#define CI_VPMD_RX_REARM_THRESH 64
 
 struct ci_rx_queue;
 
@@ -39,6 +41,7 @@ struct ci_rx_queue {
 		volatile union ice_32b_rx_flex_desc *ice_rx_32b_ring;
 		volatile union iavf_16byte_rx_desc *iavf_rx_16b_ring;
 		volatile union iavf_32byte_rx_desc *iavf_rx_32b_ring;
+		volatile void *rx_ring; /**< Generic */
 	};
 	volatile uint8_t *qrx_tail;   /**< register address of tail */
 	struct ci_rx_entry *sw_ring; /**< address of RX software ring. */
diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
new file mode 100644
index 0000000000..6fe0baf38b
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_sse.h
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_SSE_H_
+#define _COMMON_INTEL_RX_VEC_SSE_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+
+#include "rx.h"
+
+enum ci_rx_vec_level {
+	CI_RX_VEC_LEVEL_SSE = 0,
+	CI_RX_VEC_LEVEL_AVX2,
+	CI_RX_VEC_LEVEL_AVX512,
+};
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void **)rxp,
+				 rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
+				_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr),
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m128i zero = _mm_setzero_si128();
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
+		__m128i vaddr0, vaddr1;
+		__m128i dma_addr0, dma_addr1;
+		struct rte_mbuf *mb0, *mb1;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* add headroom to address values */
+		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
+		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
+		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
+	}
+}
+
+#ifdef __AVX2__
+/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const size_t desc_len = 16;
+	volatile void *rxdp;
+	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
+	const __m256i zero = _mm256_setzero_si256();
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i dma_addr0_1, dma_addr2_3;
+		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+		mb2 = rxp[2].mbuf;
+		mb3 = rxp[3].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+
+		/* add headroom to address values */
+		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
+		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
+		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
+	}
+}
+#endif /* __AVX2__ */
+
+#ifdef __AVX512VL__
+/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline void
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	const size_t desc_len = 16;
+	volatile void *rxdp;
+	int i;
+	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+	__m512i dma_addr0_3, dma_addr4_7;
+	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	__m512i zero = _mm512_setzero_si512();
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i vaddr4_5, vaddr6_7;
+		__m512i vaddr0_3, vaddr4_7;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+		mb2 = rxp[2].mbuf;
+		mb3 = rxp[3].mbuf;
+		mb4 = rxp[4].mbuf;
+		mb5 = rxp[5].mbuf;
+		mb6 = rxp[6].mbuf;
+		mb7 = rxp[7].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+#endif
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3, and so on.
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+		vaddr4_5 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+						vaddr5, 1);
+		vaddr6_7 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+						vaddr7, 1);
+		vaddr0_3 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+						vaddr2_3, 1);
+		vaddr4_7 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+						vaddr6_7, 1);
+
+		/* add headroom to address values */
+		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
+		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
+		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
+#else
+		/* erase Header Buffer Address */
+		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
+		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
+#endif
+
+		/* flush desc with pa dma_addr */
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
+	}
+}
+#endif /* __AVX512VL__ */
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
+		const enum ci_rx_vec_level vec_level)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
+		return;
+
+	if (desc_len == 16) {
+		switch (vec_level) {
+		case CI_RX_VEC_LEVEL_AVX512:
+#ifdef __AVX512VL__
+			_ci_rxq_rearm_avx512(rxq);
+			break;
+#else
+			/* fall back to AVX2 unless requested not to */
+			/* fall through */
+#endif
+		case CI_RX_VEC_LEVEL_AVX2:
+#ifdef __AVX2__
+			_ci_rxq_rearm_avx2(rxq);
+			break;
+#else
+			/* fall back to SSE if AVX2 isn't supported */
+			/* fall through */
+#endif
+		case CI_RX_VEC_LEVEL_SSE:
+			_ci_rxq_rearm_sse(rxq, desc_len);
+			break;
+		}
+	} else {
+		/* for 32-byte descriptors only support SSE */
+		_ci_rxq_rearm_sse(rxq, desc_len);
+	}
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_SSE_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index 1a39770d7d..72d0972587 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -43,7 +43,7 @@
 
 #define ICE_VPMD_RX_BURST           32
 #define ICE_VPMD_TX_BURST           32
-#define ICE_RXQ_REARM_THRESH        64
+#define ICE_RXQ_REARM_THRESH        CI_VPMD_RX_REARM_THRESH
 #define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
 #define ICE_TX_MAX_FREE_BUF_SZ      64
 #define ICE_DESCS_PER_LOOP          4
diff --git a/drivers/net/intel/ice/ice_rxtx_common_avx.h b/drivers/net/intel/ice/ice_rxtx_common_avx.h
deleted file mode 100644
index 7209c902db..0000000000
--- a/drivers/net/intel/ice/ice_rxtx_common_avx.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2019 Intel Corporation
- */
-
-#ifndef _ICE_RXTX_COMMON_AVX_H_
-#define _ICE_RXTX_COMMON_AVX_H_
-
-#include "ice_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-ice_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, vaddr4_7);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *, &(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__ */
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < ICE_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-#if RTE_IOVA_IN_MBUF
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-#else
-			/* convert va to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, vaddr2_3);
-#endif
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *, &(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__ */
-
-#endif /* _ICE_RXTX_COMMON_AVX_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index f4555369a2..5ca3f92482 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -3,14 +3,15 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_AVX2);
 }
 
 static __rte_always_inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index 6eea74d703..883ea97c07 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -3,7 +3,8 @@
  */
 
 #include "ice_rxtx_vec_common.h"
-#include "ice_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
@@ -12,7 +13,7 @@
 static __rte_always_inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	ice_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_AVX512);
 }
 
 static inline __m256i
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index dc9d37226a..fa0c7e8829 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -4,6 +4,8 @@
 
 #include "ice_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline __m128i
@@ -28,80 +30,7 @@ ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
 static inline void
 ice_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ice_rx_flex_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-					  RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = ICE_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 ICE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			ICE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				 offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-#if RTE_IOVA_IN_MBUF
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-#else
-		/* convert va to dma_addr hdr/data */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1);
-#endif
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ice_rx_flex_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 08/13] net/i40e: use common Rx rearm code
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (5 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 09/13] net/iavf: " Anatoly Burakov
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes

The i40e driver has an implementation of vectorized mbuf rearm code that
is identical to the one in the common code, so just use that.

In addition, the i40e has an implementation of Rx queue rearm for Neon
instruction set, so create a common header for Neon implementations too,
and use that in i40e Neon code.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx_vec_neon.h        | 131 +++++++++++
 drivers/net/intel/i40e/i40e_rxtx.h            |   2 +-
 drivers/net/intel/i40e/i40e_rxtx_common_avx.h | 215 ------------------
 drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c   |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c |   5 +-
 drivers/net/intel/i40e/i40e_rxtx_vec_neon.c   |  59 +----
 drivers/net/intel/i40e/i40e_rxtx_vec_sse.c    |  70 +-----
 7 files changed, 144 insertions(+), 343 deletions(-)
 create mode 100644 drivers/net/intel/common/rx_vec_neon.h
 delete mode 100644 drivers/net/intel/i40e/i40e_rxtx_common_avx.h

diff --git a/drivers/net/intel/common/rx_vec_neon.h b/drivers/net/intel/common/rx_vec_neon.h
new file mode 100644
index 0000000000..35379ab563
--- /dev/null
+++ b/drivers/net/intel/common/rx_vec_neon.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_RX_VEC_NEON_H_
+#define _COMMON_INTEL_RX_VEC_NEON_H_
+
+#include <stdint.h>
+
+#include <ethdev_driver.h>
+#include <rte_io.h>
+#include <rte_vect.h>
+
+#include "rx.h"
+
+static inline int
+_ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	volatile void *rxdp;
+	int i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void **)rxp,
+				 rearm_thresh) < 0) {
+		if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) {
+			uint64x2_t zero = vdupq_n_u64(0);
+
+			for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) {
+				rxp[i].mbuf = &rxq->fake_mbuf;
+				const void *ptr = RTE_PTR_ADD(rxdp, i * desc_len);
+				vst1q_u64(RTE_CAST_PTR(uint64_t *, ptr), zero);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += rearm_thresh;
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * SSE code path can handle both 16-byte and 32-byte descriptors with one code
+ * path, as we only ever write 16 bytes at a time.
+ */
+static __rte_always_inline void
+_ci_rxq_rearm_neon(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const uint64x2_t zero = vdupq_n_u64(0);
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	volatile void *rxdp;
+	int i;
+
+	const uint8x8_t mbuf_init = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
+
+	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
+		uint64_t addr0, addr1;
+		uint64x2_t dma_addr0, dma_addr1;
+		struct rte_mbuf *mb0, *mb1;
+
+		mb0 = rxp[0].mbuf;
+		mb1 = rxp[1].mbuf;
+
+#if RTE_IOVA_IN_MBUF
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		addr0 = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(volatile uint64_t *, ptr0), dma_addr0);
+
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+		addr1 = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		vst1q_u64(RTE_CAST_PTR(volatile uint64_t *, ptr1), dma_addr1);
+#else
+		/*
+		 * Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 */
+		vst1_u8((uint8_t *)&mb0->rearm_data, mbuf_init);
+		addr0 = (uintptr_t)RTE_PTR_ADD(mb0->buf_addr, RTE_PKTMBUF_HEADROOM);
+		dma_addr0 = vsetq_lane_u64(addr0, zero, 0);
+		/* flush desc with pa dma_addr */
+		vst1q_u64(RTE_CAST_PTR(volatile uint64_t *, ptr0), dma_addr0);
+
+		vst1_u8((uint8_t *)&mb1->rearm_data, mbuf_init);
+		addr1 = (uintptr_t)RTE_PTR_ADD(mb1->buf_addr, RTE_PKTMBUF_HEADROOM);
+		dma_addr1 = vsetq_lane_u64(addr1, zero, 0);
+		vst1q_u64(RTE_CAST_PTR(volatile uint64_t *, ptr1), dma_addr1);
+#endif
+	}
+}
+
+static __rte_always_inline void
+ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	uint16_t rx_id;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
+		return;
+
+	_ci_rxq_rearm_neon(rxq, desc_len);
+
+	rxq->rxrearm_start += rearm_thresh;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= rearm_thresh;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif /* _COMMON_INTEL_RX_VEC_NEON_H_ */
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index 4b5a84d8ef..8a41db2df3 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -13,7 +13,7 @@
 
 #define RTE_I40E_VPMD_RX_BURST        32
 #define RTE_I40E_VPMD_TX_BURST        32
-#define RTE_I40E_RXQ_REARM_THRESH      32
+#define RTE_I40E_RXQ_REARM_THRESH      CI_VPMD_RX_REARM_THRESH
 #define RTE_I40E_MAX_RX_BURST          RTE_I40E_RXQ_REARM_THRESH
 #define RTE_I40E_TX_MAX_FREE_BUF_SZ    64
 #define RTE_I40E_DESCS_PER_LOOP    4
diff --git a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h b/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
deleted file mode 100644
index fd9447014b..0000000000
--- a/drivers/net/intel/i40e/i40e_rxtx_common_avx.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2015 Intel Corporation
- */
-
-#ifndef _I40E_RXTX_COMMON_AVX_H_
-#define _I40E_RXTX_COMMON_AVX_H_
-#include <stdint.h>
-#include <ethdev_driver.h>
-#include <rte_malloc.h>
-
-#include "i40e_ethdev.h"
-#include "i40e_rxtx.h"
-
-#ifdef __AVX2__
-static __rte_always_inline void
-i40e_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef __AVX512VL__
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-				i += 8, rxep += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-			mb4 = rxep[4].mbuf;
-			mb5 = rxep[5].mbuf;
-			mb6 = rxep[6].mbuf;
-			mb7 = rxep[7].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						   vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						   vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&rxdp->read), dma_addr0_3);
-			_mm512_store_si512(RTE_CAST_PTR(__m512i *,
-					&(rxdp + 4)->read), dma_addr4_7);
-		}
-	} else
-#endif /* __AVX512VL__*/
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
-				i += 4, rxep += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxep[0].mbuf;
-			mb1 = rxep[1].mbuf;
-			mb2 = rxep[2].mbuf;
-			mb3 = rxep[3].mbuf;
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr0), vaddr1, 1);
-			vaddr2_3 = _mm256_inserti128_si256
-				(_mm256_castsi128_si256(vaddr2), vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&rxdp->read), dma_addr0_1);
-			_mm256_store_si256(RTE_CAST_PTR(__m256i *,
-					&(rxdp + 2)->read), dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif /* __AVX2__*/
-
-#endif /*_I40E_RXTX_COMMON_AVX_H_*/
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index 0f3f7430aa..260b7d700a 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -11,14 +11,15 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_AVX2);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index f2292b45e8..be004e9f4f 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -11,7 +11,8 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
-#include "i40e_rxtx_common_avx.h"
+
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
@@ -20,7 +21,7 @@
 static __rte_always_inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	i40e_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_AVX512);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 814aa666dc..6c21546471 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -16,65 +16,12 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_I40E_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc));
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 74cd59e245..432177d499 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -12,78 +12,14 @@
 #include "i40e_rxtx.h"
 #include "i40e_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline void
 i40e_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union i40e_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
-
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union i40e_rx_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 09/13] net/iavf: use common Rx rearm code
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (6 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:27 ` [PATCH v1 10/13] net/ixgbe: " Anatoly Burakov
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin, Ian Stokes; +Cc: bruce.richardson

The iavf driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/iavf/iavf_rxtx.h            |   4 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c   |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c |   3 +-
 drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 199 ------------------
 drivers/net/intel/iavf/iavf_rxtx_vec_neon.c   |  58 +----
 drivers/net/intel/iavf/iavf_rxtx_vec_sse.c    |  72 +------
 6 files changed, 11 insertions(+), 328 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index c43ddc3c2f..d70250bf85 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -25,8 +25,8 @@
 /* used for Vector PMD */
 #define IAVF_VPMD_RX_MAX_BURST    32
 #define IAVF_VPMD_TX_MAX_BURST    32
-#define IAVF_RXQ_REARM_THRESH     32
-#define IAVF_VPMD_DESCS_PER_LOOP  4
+#define IAVF_RXQ_REARM_THRESH     CI_VPMD_RX_REARM_THRESH
+#define IAVF_VPMD_DESCS_PER_LOOP  CI_VPMD_DESCS_PER_LOOP
 #define IAVF_VPMD_TX_MAX_FREE_BUF 64
 
 #define IAVF_TX_NO_VECTOR_FLAGS (				 \
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index f51fa4acf9..496c7abc42 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2019 Intel Corporation
  */
 
+#include "../common/rx_vec_sse.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -9,7 +10,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, false);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), false);
 }
 
 #define PKTLEN_SHIFT     10
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index 80495f33cd..e7cd2b7c89 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include "../common/rx_vec_sse.h"
 #include "iavf_rxtx_vec_common.h"
 
 #include <rte_vect.h>
@@ -30,7 +31,7 @@
 static __rte_always_inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	iavf_rxq_rearm_common(rxq, true);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), true);
 }
 
 #define IAVF_RX_LEN_MASK 0x80808080
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index f0a7d19b6a..50228eb112 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -237,203 +237,4 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 	*txd_hi |= ((uint64_t)td_cmd) << IAVF_TXD_QW1_CMD_SHIFT;
 }
 
-#ifdef RTE_ARCH_X86
-static __rte_always_inline void
-iavf_rxq_rearm_common(struct ci_rx_queue *rxq, __rte_unused bool avx512)
-{
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxp,
-				 IAVF_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			__m128i dma_addr0;
-
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-	struct rte_mbuf *mb0, *mb1;
-	__m128i dma_addr0, dma_addr1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-#else
-#ifdef CC_AVX512_SUPPORT
-	if (avx512) {
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-		__m512i dma_addr0_3, dma_addr4_7;
-		__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-				i += 8, rxp += 8, rxdp += 8) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-			__m256i vaddr0_1, vaddr2_3;
-			__m256i vaddr4_5, vaddr6_7;
-			__m512i vaddr0_3, vaddr4_7;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-			mb4 = rxp[4];
-			mb5 = rxp[5];
-			mb6 = rxp[6];
-			mb7 = rxp[7];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-			vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-			vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-			vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-			vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3, and so on.
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-			vaddr4_5 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-							vaddr5, 1);
-			vaddr6_7 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-							vaddr7, 1);
-			vaddr0_3 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-							vaddr2_3, 1);
-			vaddr4_7 =
-				_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-							vaddr6_7, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
-			dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
-
-			/* add headroom to pa values */
-			dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
-			dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
-			_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
-		}
-	} else
-#endif
-	{
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-		/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-		for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-				i += 4, rxp += 4, rxdp += 4) {
-			__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-			__m256i vaddr0_1, vaddr2_3;
-
-			mb0 = rxp[0];
-			mb1 = rxp[1];
-			mb2 = rxp[2];
-			mb3 = rxp[3];
-
-			/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-			RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-					offsetof(struct rte_mbuf, buf_addr) + 8);
-			vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-			vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-			vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-			vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-			/**
-			 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-			 * into the high lanes. Similarly for 2 & 3
-			 */
-			vaddr0_1 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-							vaddr1, 1);
-			vaddr2_3 =
-				_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-							vaddr3, 1);
-
-			/* convert pa to dma_addr hdr/data */
-			dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-			dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-			/* add headroom to pa values */
-			dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-			dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-			/* flush desc with pa dma_addr */
-			_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-			_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-		}
-	}
-
-#endif
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
-}
-#endif
-
 #endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
index e1c8f3c7f9..490028c68a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
@@ -14,64 +14,12 @@
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
-	struct rte_mbuf **rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  IAVF_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxep[i] = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read), zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IAVF_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0];
-		mb1 = rxep[1];
-
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vdupq_n_u64(paddr);
-
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vdupq_n_u64(paddr);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc));
 }
 
 static inline void
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index f18dfd636c..3f0ca6cf8e 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -9,82 +9,14 @@
 #include "iavf.h"
 #include "iavf_rxtx.h"
 #include "iavf_rxtx_vec_common.h"
+#include "../common/rx_vec_sse.h"
 
 #include <rte_vect.h>
 
 static inline void
 iavf_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-
-	volatile union iavf_rx_desc *rxdp;
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	rxdp = IAVF_RX_RING_PTR(rxq, rxq->rxrearm_start);
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp, (void *)rxp,
-				 rxq->rx_free_thresh) < 0) {
-		if (rxq->rxrearm_nb + rxq->rx_free_thresh >= rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
-				rxp[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			rxq->rx_free_thresh;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rxq->rx_free_thresh; i += 2, rxp += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += rxq->rx_free_thresh;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= rxq->rx_free_thresh;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-		   "rearm_start=%u rearm_nb=%u",
-		   rxq->port_id, rxq->queue_id,
-		   rx_id, rxq->rxrearm_start, rxq->rxrearm_nb);
-
-	/* Update the tail pointer on the NIC */
-	IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union iavf_rx_desc), false);
 }
 
 static inline void
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 10/13] net/ixgbe: use common Rx rearm code
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (7 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 09/13] net/iavf: " Anatoly Burakov
@ 2025-05-06 13:27 ` Anatoly Burakov
  2025-05-06 13:28 ` [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:27 UTC (permalink / raw)
  To: dev, Vladimir Medvedkin; +Cc: bruce.richardson

The ixgbe driver has implementations of vectorized mbuf rearm code that
is identical to the ones in the common code, so just use those.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.h          |  2 +-
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c | 66 +---------------
 drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c  | 75 +------------------
 3 files changed, 7 insertions(+), 136 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.h b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
index 84e28eb254..f3dd32b9ff 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.h
@@ -37,7 +37,7 @@
 #define RTE_IXGBE_DESCS_PER_LOOP    4
 
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
-#define RTE_IXGBE_RXQ_REARM_THRESH      32
+#define RTE_IXGBE_RXQ_REARM_THRESH      CI_VPMD_RX_REARM_THRESH
 #define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
 #endif
 
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
index 630a2e6a1d..0842f213ef 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -11,72 +11,12 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_neon.h"
+
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	uint64x2_t dma_addr0, dma_addr1;
-	uint64x2_t zero = vdupq_n_u64(0);
-	uint64_t paddr;
-	uint8x8_t p;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i].read),
-					  zero);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/*
-		 * Flush mbuf with pkt template.
-		 * Data to be rearmed is 6 bytes long.
-		 */
-		vst1_u8((uint8_t *)&mb0->rearm_data, p);
-		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
-		/* flush desc with pa dma_addr */
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr0);
-
-		vst1_u8((uint8_t *)&mb1->rearm_data, p);
-		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
-		vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ixgbe_adv_rx_desc));
 }
 
 static inline void
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
index ecfb0d6ba6..c6e90b8d41 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -10,83 +10,14 @@
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
+#include "../common/rx_vec_sse.h"
+
 #include <rte_vect.h>
 
 static inline void
 ixgbe_rxq_rearm(struct ci_rx_queue *rxq)
 {
-	int i;
-	uint16_t rx_id;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	struct ci_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
-	struct rte_mbuf *mb0, *mb1;
-	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
-			RTE_PKTMBUF_HEADROOM);
-	__m128i dma_addr0, dma_addr1;
-
-	const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (rte_mempool_get_bulk(rxq->mp,
-				 (void *)rxep,
-				 RTE_IXGBE_RXQ_REARM_THRESH) < 0) {
-		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read),
-						dma_addr0);
-			}
-		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_IXGBE_RXQ_REARM_THRESH;
-		return;
-	}
-
-	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-		__m128i vaddr0, vaddr1;
-
-		mb0 = rxep[0].mbuf;
-		mb1 = rxep[1].mbuf;
-
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
-		vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
-
-		/* add headroom to pa values */
-		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
-		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
-
-		/* set Header Buffer Address to zero */
-		dma_addr0 =  _mm_and_si128(dma_addr0, hba_msk);
-		dma_addr1 =  _mm_and_si128(dma_addr1, hba_msk);
-
-		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1);
-	}
-
-	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
-
-	rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ?
-			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
+	ci_rxq_rearm(rxq, sizeof(union ixgbe_adv_rx_desc), CI_RX_VEC_LEVEL_SSE);
 }
 
 #ifdef RTE_LIB_SECURITY
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (8 preceding siblings ...)
  2025-05-06 13:27 ` [PATCH v1 10/13] net/ixgbe: " Anatoly Burakov
@ 2025-05-06 13:28 ` Anatoly Burakov
  2025-05-06 13:28 ` [PATCH v1 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
  2025-05-06 13:28 ` [PATCH v1 13/13] net/intel: add common Tx " Anatoly Burakov
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:28 UTC (permalink / raw)
  To: dev, Bruce Richardson

Currently, for 32-byte descriptor format, only SSE instruction set is
supported. Add implementation for AVX2 and AVX512 instruction sets. This
implementation similarly constant-propagates everything at compile time and
thus should not affect performance of existing code paths. To improve code
readability and reduce code duplication due to supporting different sized
descriptors, the implementation is also refactored.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/rx_vec_sse.h | 380 ++++++++++++++------------
 1 file changed, 205 insertions(+), 175 deletions(-)

diff --git a/drivers/net/intel/common/rx_vec_sse.h b/drivers/net/intel/common/rx_vec_sse.h
index 6fe0baf38b..0aeaac3dc9 100644
--- a/drivers/net/intel/common/rx_vec_sse.h
+++ b/drivers/net/intel/common/rx_vec_sse.h
@@ -48,223 +48,258 @@ _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq, const size_t desc_len)
 	return 0;
 }
 
-/*
- * SSE code path can handle both 16-byte and 32-byte descriptors with one code
- * path, as we only ever write 16 bytes at a time.
- */
-static __rte_always_inline void
-_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+static __rte_always_inline __m128i
+_ci_rxq_rearm_desc_sse(const __m128i vaddr)
 {
 	const __m128i hdr_room = _mm_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m128i zero = _mm_setzero_si128();
+	__m128i reg;
+
+	/* add headroom to address values */
+	reg = _mm_add_epi64(vaddr, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			 offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_sse(struct ci_rx_queue *rxq, const size_t desc_len)
+{
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint8_t desc_per_reg = 1;
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile void *rxdp;
 	int i;
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 2, rxp += 2, rxdp = RTE_PTR_ADD(rxdp, 2 * desc_len)) {
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len);
-		__m128i vaddr0, vaddr1;
-		__m128i dma_addr0, dma_addr1;
-		struct rte_mbuf *mb0, *mb1;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		const struct rte_mbuf *mb0 = rxp[0].mbuf;
+		const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
+		const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+		const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-
-		/* add headroom to address values */
-		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
-		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
-
-#if RTE_IOVA_IN_MBUF
-		/* move IOVA to Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0 = _mm_unpackhi_epi64(vaddr0, zero);
-		dma_addr1 = _mm_unpackhi_epi64(vaddr1, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0 = _mm_unpacklo_epi64(vaddr0, zero);
-		dma_addr1 = _mm_unpacklo_epi64(vaddr1, zero);
-#endif
+		const __m128i reg0 = _ci_rxq_rearm_desc_sse(vaddr0);
+		const __m128i reg1 = _ci_rxq_rearm_desc_sse(vaddr1);
 
 		/* flush desc with pa dma_addr */
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), dma_addr0);
-		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), dma_addr1);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr0), reg0);
+		_mm_store_si128(RTE_CAST_PTR(__m128i *, ptr1), reg1);
 	}
 }
 
 #ifdef __AVX2__
-/* AVX2 version for 16-byte descriptors, handles 4 buffers at a time */
-static __rte_always_inline void
-_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq)
+static __rte_always_inline __m256i
+_ci_rxq_rearm_desc_avx2(const __m128i vaddr0, const __m128i vaddr1)
 {
-	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
-	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const size_t desc_len = 16;
-	volatile void *rxdp;
 	const __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 	const __m256i zero = _mm256_setzero_si256();
+	__m256i reg;
+
+	/* merge by casting 0 to 256-bit and inserting 1 into the high lanes */
+	reg =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+					vaddr1, 1);
+
+	/* add headroom to address values */
+	reg = _mm256_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm256_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm256_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
+static __rte_always_inline void
+_ci_rxq_rearm_avx2(struct ci_rx_queue *rxq, const size_t desc_len)
+{
+	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
+	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m256i) / desc_len;
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
+	volatile void *rxdp;
 	int i;
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 4, rxp += 4, rxdp = RTE_PTR_ADD(rxdp, 4 * desc_len)) {
+	/* Initialize the mbufs in vector, process 2 or 4 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 2);
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-		__m256i dma_addr0_1, dma_addr2_3;
-		struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		__m256i reg0, reg1;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-		mb2 = rxp[2].mbuf;
-		mb3 = rxp[3].mbuf;
+		if (desc_per_iter == 2) {
+			/* 16 byte descriptor, 16 byte zero, times two */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, zero);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr1, zero);
+		} else {
+			/* 16 byte descriptor times four */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-		/* add headroom to address values */
-		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
-		vaddr0_1 = _mm256_add_epi64(vaddr0_1, hdr_room);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, zero);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, zero);
-		dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx2(vaddr0, vaddr1);
+			reg1 = _ci_rxq_rearm_desc_avx2(vaddr2, vaddr3);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), dma_addr0_1);
-		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), dma_addr2_3);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr0), reg0);
+		_mm256_store_si256(RTE_CAST_PTR(__m256i *, ptr1), reg1);
 	}
 }
 #endif /* __AVX2__ */
 
 #ifdef __AVX512VL__
-/* AVX512 version for 16-byte descriptors, handles 8 buffers at a time */
+static __rte_always_inline __m512i
+_ci_rxq_rearm_desc_avx512(const __m128i vaddr0, const __m128i vaddr1,
+		const __m128i vaddr2, const __m128i vaddr3)
+{
+	const __m512i zero = _mm512_setzero_si512();
+	const __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	__m256i vaddr0_1, vaddr2_3;
+	__m512i reg;
+
+	/**
+	 * merge 0 & 1, by casting 0 to 256-bit and inserting 1 into the high
+	 * lanes. Similarly for 2 & 3.
+	 */
+	vaddr0_1 =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+					vaddr1, 1);
+	vaddr2_3 =
+		_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+					vaddr3, 1);
+	/*
+	 * merge 0+1 & 2+3, by casting 0+1 to 512-bit and inserting 2+3 into the
+	 * high lanes.
+	 */
+	reg =
+		_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+					vaddr2_3, 1);
+
+	/* add headroom to address values */
+	reg = _mm512_add_epi64(reg, hdr_room);
+
+#if RTE_IOVA_IN_MBUF
+	/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+			offsetof(struct rte_mbuf, buf_addr) + 8);
+	/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
+	reg = _mm512_unpackhi_epi64(reg, zero);
+#else
+	/* erase Header Buffer Address */
+	reg = _mm512_unpacklo_epi64(reg, zero);
+#endif
+	return reg;
+}
+
 static __rte_always_inline void
-_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq)
+_ci_rxq_rearm_avx512(struct ci_rx_queue *rxq, const size_t desc_len)
 {
 	struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start];
 	const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH;
-	const size_t desc_len = 16;
+	/* how many descriptors can fit into a register */
+	const uint8_t desc_per_reg = sizeof(__m512i) / desc_len;
+	/* how many descriptors can fit into one loop iteration */
+	const uint8_t desc_per_iter = desc_per_reg * 2;
 	volatile void *rxdp;
 	int i;
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
-	__m512i dma_addr0_3, dma_addr4_7;
-	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
-	__m512i zero = _mm512_setzero_si512();
 
 	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
 
-	/* Initialize the mbufs in vector, process 8 mbufs in one loop */
-	for (i = 0; i < rearm_thresh; i += 8, rxp += 8, rxdp = RTE_PTR_ADD(rxdp, 8 * desc_len)) {
+	/* Initialize the mbufs in vector, process 4 or 8 mbufs in one loop */
+	for (i = 0; i < rearm_thresh;
+			i += desc_per_iter,
+			rxp += desc_per_iter,
+			rxdp = RTE_PTR_ADD(rxdp, desc_per_iter * desc_len)) {
 		volatile void *ptr0 = RTE_PTR_ADD(rxdp, 0);
-		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * 4);
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
-		__m256i vaddr0_1, vaddr2_3;
-		__m256i vaddr4_5, vaddr6_7;
-		__m512i vaddr0_3, vaddr4_7;
+		volatile void *ptr1 = RTE_PTR_ADD(rxdp, desc_len * desc_per_reg);
+		__m512i reg0, reg1;
 
-		mb0 = rxp[0].mbuf;
-		mb1 = rxp[1].mbuf;
-		mb2 = rxp[2].mbuf;
-		mb3 = rxp[3].mbuf;
-		mb4 = rxp[4].mbuf;
-		mb5 = rxp[5].mbuf;
-		mb6 = rxp[6].mbuf;
-		mb7 = rxp[7].mbuf;
+		if (desc_per_iter == 4) {
+			/* 16-byte descriptor, 16 byte zero, times four */
+			const __m128i zero = _mm_setzero_si128();
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
 
-#if RTE_IOVA_IN_MBUF
-		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-#endif
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
-		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
-		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
-		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
 
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3, and so on.
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-		vaddr4_5 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
-						vaddr5, 1);
-		vaddr6_7 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
-						vaddr7, 1);
-		vaddr0_3 =
-			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
-						vaddr2_3, 1);
-		vaddr4_7 =
-			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
-						vaddr6_7, 1);
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, zero, vaddr1, zero);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr2, zero, vaddr3, zero);
+		} else {
+			/* 16-byte descriptor times eight */
+			const struct rte_mbuf *mb0 = rxp[0].mbuf;
+			const struct rte_mbuf *mb1 = rxp[1].mbuf;
+			const struct rte_mbuf *mb2 = rxp[2].mbuf;
+			const struct rte_mbuf *mb3 = rxp[3].mbuf;
+			const struct rte_mbuf *mb4 = rxp[4].mbuf;
+			const struct rte_mbuf *mb5 = rxp[5].mbuf;
+			const struct rte_mbuf *mb6 = rxp[6].mbuf;
+			const struct rte_mbuf *mb7 = rxp[7].mbuf;
 
-		/* add headroom to address values */
-		vaddr0_3 = _mm512_add_epi64(vaddr0_3, hdr_room);
-		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+			const __m128i vaddr0 = _mm_loadu_si128((const __m128i *)&mb0->buf_addr);
+			const __m128i vaddr1 = _mm_loadu_si128((const __m128i *)&mb1->buf_addr);
+			const __m128i vaddr2 = _mm_loadu_si128((const __m128i *)&mb2->buf_addr);
+			const __m128i vaddr3 = _mm_loadu_si128((const __m128i *)&mb3->buf_addr);
+			const __m128i vaddr4 = _mm_loadu_si128((const __m128i *)&mb4->buf_addr);
+			const __m128i vaddr5 = _mm_loadu_si128((const __m128i *)&mb5->buf_addr);
+			const __m128i vaddr6 = _mm_loadu_si128((const __m128i *)&mb6->buf_addr);
+			const __m128i vaddr7 = _mm_loadu_si128((const __m128i *)&mb7->buf_addr);
 
-#if RTE_IOVA_IN_MBUF
-		/* extract IOVA addr into Packet Buffer Address, erase Header Buffer Address */
-		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, zero);
-		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, zero);
-#else
-		/* erase Header Buffer Address */
-		dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, zero);
-		dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, zero);
-#endif
+			reg0 = _ci_rxq_rearm_desc_avx512(vaddr0, vaddr1, vaddr2, vaddr3);
+			reg1 = _ci_rxq_rearm_desc_avx512(vaddr4, vaddr5, vaddr6, vaddr7);
+		}
 
 		/* flush desc with pa dma_addr */
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), dma_addr0_3);
-		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), dma_addr4_7);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr0), reg0);
+		_mm512_store_si512(RTE_CAST_PTR(__m512i *, ptr1), reg1);
 	}
 }
 #endif /* __AVX512VL__ */
@@ -280,31 +315,26 @@ ci_rxq_rearm(struct ci_rx_queue *rxq, const size_t desc_len,
 	if (_ci_rxq_rearm_get_bufs(rxq, desc_len) < 0)
 		return;
 
-	if (desc_len == 16) {
-		switch (vec_level) {
-		case CI_RX_VEC_LEVEL_AVX512:
+	switch (vec_level) {
+	case CI_RX_VEC_LEVEL_AVX512:
 #ifdef __AVX512VL__
-			_ci_rxq_rearm_avx512(rxq);
-			break;
+		_ci_rxq_rearm_avx512(rxq, desc_len);
+		break;
 #else
-			/* fall back to AVX2 unless requested not to */
-			/* fall through */
+		/* fall back to AVX2 unless requested not to */
+		/* fall through */
 #endif
-		case CI_RX_VEC_LEVEL_AVX2:
+	case CI_RX_VEC_LEVEL_AVX2:
 #ifdef __AVX2__
-			_ci_rxq_rearm_avx2(rxq);
+			_ci_rxq_rearm_avx2(rxq, desc_len);
 			break;
 #else
 			/* fall back to SSE if AVX2 isn't supported */
 			/* fall through */
 #endif
-		case CI_RX_VEC_LEVEL_SSE:
-			_ci_rxq_rearm_sse(rxq, desc_len);
-			break;
-		}
-	} else {
-		/* for 32-byte descriptors only support SSE */
+	case CI_RX_VEC_LEVEL_SSE:
 		_ci_rxq_rearm_sse(rxq, desc_len);
+		break;
 	}
 
 	rxq->rxrearm_start += rearm_thresh;
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 12/13] net/intel: add common Rx mbuf recycle
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (9 preceding siblings ...)
  2025-05-06 13:28 ` [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
@ 2025-05-06 13:28 ` Anatoly Burakov
  2025-05-06 13:28 ` [PATCH v1 13/13] net/intel: add common Tx " Anatoly Burakov
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:28 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Rx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

While we're at it, also support no-IOVA-in-mbuf case.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 67 +++++++++++++++++++
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 37 +---------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 35 +---------
 3 files changed, 73 insertions(+), 66 deletions(-)
 create mode 100644 drivers/net/intel/common/recycle_mbufs.h

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
new file mode 100644
index 0000000000..fd31c5c1ff
--- /dev/null
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -0,0 +1,67 @@
+#ifndef _COMMON_INTEL_RECYCLE_MBUFS_H_
+#define _COMMON_INTEL_RECYCLE_MBUFS_H_
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_mbuf.h>
+#include <rte_io.h>
+#include <ethdev_driver.h>
+
+#include "rx.h"
+#include "tx.h"
+
+/**
+ * Recycle mbufs for Rx queue.
+ *
+ * @param rxq Rx queue pointer
+ * @param nb_mbufs number of mbufs to recycle
+ * @param desc_len length of Rx descriptor
+ */
+static __rte_always_inline void
+ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
+		const size_t desc_len)
+{
+	struct ci_rx_entry *rxep;
+	volatile void *rxdp;
+	uint16_t rx_id;
+	uint16_t i;
+
+	rxdp = RTE_PTR_ADD(rxq->rx_ring, rxq->rxrearm_start * desc_len);
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	for (i = 0; i < nb_mbufs; i++) {
+		volatile uint64_t *cur = RTE_PTR_ADD(rxdp, i * desc_len);
+
+#if RTE_IOVA_IN_MBUF
+		const uint64_t paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(paddr);
+#else
+		const uint64_t vaddr = (uintptr_t)rxep[i].mbuf->buf_addr +
+			RTE_PKTMBUF_HEADROOM;
+		const uint64_t dma_addr = rte_cpu_to_le_64(vaddr);
+#endif
+
+		/* 8 bytes PBA followed by 8 bytes HBA */
+		*(cur + 1) = 0;
+		*cur = dma_addr;
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += nb_mbufs;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = 0;
+		rx_id = rxq->nb_rx_desc - 1;
+	}
+
+	rxq->rxrearm_nb -= nb_mbufs;
+
+	rte_io_wmb();
+
+	/* Update the tail pointer on the NIC */
+	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
+}
+
+#endif
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index aa7703216d..073357bee2 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -10,43 +10,12 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union i40e_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = I40E_RX_RING_PTR(rxq, rxq->rxrearm_start);
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* flush desc with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	rx_id = rxq->rxrearm_start - 1;
-
-	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
-		rxq->rxrearm_start = 0;
-		rx_id = rxq->nb_rx_desc - 1;
-	}
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rte_io_wmb();
-	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs, sizeof(union i40e_rx_desc));
 }
 
 uint16_t
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index 1df1787c7f..e2c3523ed2 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -8,41 +8,12 @@
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 
+#include "../common/recycle_mbufs.h"
+
 void
 ixgbe_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
 {
-	struct ci_rx_queue *rxq = rx_queue;
-	struct ci_rx_entry *rxep;
-	volatile union ixgbe_adv_rx_desc *rxdp;
-	uint16_t rx_id;
-	uint64_t paddr;
-	uint64_t dma_addr;
-	uint16_t i;
-
-	rxdp = rxq->ixgbe_rx_ring + rxq->rxrearm_start;
-	rxep = &rxq->sw_ring[rxq->rxrearm_start];
-
-	for (i = 0; i < nb_mbufs; i++) {
-		/* Initialize rxdp descs. */
-		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
-		dma_addr = rte_cpu_to_le_64(paddr);
-		/* Flush descriptors with pa dma_addr */
-		rxdp[i].read.hdr_addr = 0;
-		rxdp[i].read.pkt_addr = dma_addr;
-	}
-
-	/* Update the descriptor initializer index */
-	rxq->rxrearm_start += nb_mbufs;
-	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
-		rxq->rxrearm_start = 0;
-
-	rxq->rxrearm_nb -= nb_mbufs;
-
-	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
-			(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
-
-	/* Update the tail pointer on the NIC */
-	IXGBE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	ci_rx_recycle_mbufs(rx_queue, nb_mbufs, sizeof(union ixgbe_adv_rx_desc));
 }
 
 uint16_t
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v1 13/13] net/intel: add common Tx mbuf recycle
  2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
                   ` (10 preceding siblings ...)
  2025-05-06 13:28 ` [PATCH v1 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
@ 2025-05-06 13:28 ` Anatoly Burakov
  11 siblings, 0 replies; 13+ messages in thread
From: Anatoly Burakov @ 2025-05-06 13:28 UTC (permalink / raw)
  To: dev, Bruce Richardson, Ian Stokes, Vladimir Medvedkin

Currently, there are duplicate implementations of Tx mbuf recycle in some
drivers, specifically ixgbe and i40e. Move them into a common header.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/common/recycle_mbufs.h      | 98 +++++++++++++++++++
 drivers/net/intel/common/tx.h                 |  1 +
 .../i40e/i40e_recycle_mbufs_vec_common.c      | 88 +----------------
 .../ixgbe/ixgbe_recycle_mbufs_vec_common.c    | 89 +----------------
 4 files changed, 107 insertions(+), 169 deletions(-)

diff --git a/drivers/net/intel/common/recycle_mbufs.h b/drivers/net/intel/common/recycle_mbufs.h
index fd31c5c1ff..88779c5aa4 100644
--- a/drivers/net/intel/common/recycle_mbufs.h
+++ b/drivers/net/intel/common/recycle_mbufs.h
@@ -64,4 +64,102 @@ ci_rx_recycle_mbufs(struct ci_rx_queue *rxq, const uint16_t nb_mbufs,
 	rte_write32_wc_relaxed(rte_cpu_to_le_32(rx_id), rxq->qrx_tail);
 }
 
+/**
+ * Recycle buffers on Tx. Note: the function must first perform a driver-specific
+ * DD-bit-set check to ensure that the Tx descriptors are ready for recycling.
+ *
+ * @param txq Tx queue pointer
+ * @param recycle_rxq_info recycling mbuf information
+ *
+ * @return how many buffers were recycled
+ */
+static __rte_always_inline uint16_t
+ci_tx_recycle_mbufs(struct ci_tx_queue *txq,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct ci_tx_entry *txep;
+	struct rte_mbuf **rxep;
+	int i, n;
+	uint16_t nb_recycle_mbufs;
+	uint16_t avail = 0;
+	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+	uint16_t refill_head = *recycle_rxq_info->refill_head;
+	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+	/* Get available recycling Rx buffers. */
+	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+	/* Check Tx free thresh and Rx available space. */
+	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+		return 0;
+
+	n = txq->tx_rs_thresh;
+	nb_recycle_mbufs = n;
+
+	/* Mbufs recycle mode can only support no ring buffer wrapping around.
+	 * Two case for this:
+	 *
+	 * case 1: The refill head of Rx buffer ring needs to be aligned with
+	 * mbuf ring size. In this case, the number of Tx freeing buffers
+	 * should be equal to refill_requirement.
+	 *
+	 * case 2: The refill head of Rx ring buffer does not need to be aligned
+	 * with mbuf ring size. In this case, the update of refill head can not
+	 * exceed the Rx mbuf ring size.
+	 */
+	if ((refill_requirement && refill_requirement != n) ||
+		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+		return 0;
+
+	/* First buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1).
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+	rxep = recycle_rxq_info->mbuf_ring;
+	rxep += refill_head;
+
+	/* is fast-free enabled in offloads? */
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* Avoid txq containing buffers from unexpected mempool. */
+		if (unlikely(recycle_rxq_info->mp
+					!= txep[0].mbuf->pool))
+			return 0;
+
+		/* Directly put mbufs from Tx to Rx. */
+		for (i = 0; i < n; i++)
+			rxep[i] = txep[i].mbuf;
+	} else {
+		for (i = 0; i < n; i++) {
+			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+			/* If Tx buffers are not the last reference or from
+			 * unexpected mempool, previous copied buffers are
+			 * considered as invalid.
+			 */
+			if (unlikely(rxep[i] == NULL ||
+				recycle_rxq_info->mp != txep[i].mbuf->pool))
+				nb_recycle_mbufs = 0;
+		}
+		/* If Tx buffers are not the last reference or
+		 * from unexpected mempool, all recycled buffers
+		 * are put into mempool.
+		 */
+		if (nb_recycle_mbufs == 0)
+			for (i = 0; i < n; i++) {
+				if (rxep[i] != NULL)
+					rte_mempool_put(rxep[i]->pool, rxep[i]);
+			}
+	}
+
+	/* Update counters for Tx. */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+	return nb_recycle_mbufs;
+}
+
 #endif
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index c99bd5420f..cc70fa7db4 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -37,6 +37,7 @@ struct ci_tx_queue {
 		volatile struct ice_tx_desc *ice_tx_ring;
 		volatile struct idpf_base_tx_desc *idpf_tx_ring;
 		volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
+		volatile void *tx_ring; /**< Generic. */
 	};
 	volatile uint8_t *qtx_tail;               /* register address of tail */
 	union {
diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
index 073357bee2..19edee781d 100644
--- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
@@ -23,92 +23,12 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+	const uint64_t ctob = txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+	/* are Tx descriptors ready for recycling? */
+	if ((ctob & rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
 			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
 
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle mode can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * mbuf ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with mbuf ring size. In this case, the update of refill head can not
-	 * exceed the Rx mbuf ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
-		return 0;
-
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(txq, recycle_rxq_info);
 }
diff --git a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
index e2c3523ed2..179205b422 100644
--- a/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_recycle_mbufs_vec_common.c
@@ -21,92 +21,11 @@ ixgbe_recycle_tx_mbufs_reuse_vec(void *tx_queue,
 		struct rte_eth_recycle_rxq_info *recycle_rxq_info)
 {
 	struct ci_tx_queue *txq = tx_queue;
-	struct ci_tx_entry *txep;
-	struct rte_mbuf **rxep;
-	int i, n;
-	uint32_t status;
-	uint16_t nb_recycle_mbufs;
-	uint16_t avail = 0;
-	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
-	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
-	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
-	uint16_t refill_head = *recycle_rxq_info->refill_head;
-	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+	const uint32_t status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
 
-	/* Get available recycling Rx buffers. */
-	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
-
-	/* Check Tx free thresh and Rx available space. */
-	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
-		return 0;
-
-	/* check DD bits on threshold descriptor */
-	status = txq->ixgbe_tx_ring[txq->tx_next_dd].wb.status;
-	if (!(status & IXGBE_ADVTXD_STAT_DD))
-		return 0;
-
-	n = txq->tx_rs_thresh;
-	nb_recycle_mbufs = n;
-
-	/* Mbufs recycle can only support no ring buffer wrapping around.
-	 * Two case for this:
-	 *
-	 * case 1: The refill head of Rx buffer ring needs to be aligned with
-	 * buffer ring size. In this case, the number of Tx freeing buffers
-	 * should be equal to refill_requirement.
-	 *
-	 * case 2: The refill head of Rx ring buffer does not need to be aligned
-	 * with buffer ring size. In this case, the update of refill head can not
-	 * exceed the Rx buffer ring size.
-	 */
-	if ((refill_requirement && refill_requirement != n) ||
-		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+	/* are Tx descriptors ready for recycling? */
+	if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
 		return 0;
 
-	/* First buffer to free from S/W ring is at index
-	 * tx_next_dd - (tx_rs_thresh-1).
-	 */
-	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-	rxep = recycle_rxq_info->mbuf_ring;
-	rxep += refill_head;
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		/* Avoid txq contains buffers from unexpected mempool. */
-		if (unlikely(recycle_rxq_info->mp
-					!= txep[0].mbuf->pool))
-			return 0;
-
-		/* Directly put mbufs from Tx to Rx. */
-		for (i = 0; i < n; i++)
-			rxep[i] = txep[i].mbuf;
-	} else {
-		for (i = 0; i < n; i++) {
-			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-
-			/* If Tx buffers are not the last reference or from
-			 * unexpected mempool, previous copied buffers are
-			 * considered as invalid.
-			 */
-			if (unlikely(rxep[i] == NULL ||
-				recycle_rxq_info->mp != txep[i].mbuf->pool))
-				nb_recycle_mbufs = 0;
-		}
-		/* If Tx buffers are not the last reference or
-		 * from unexpected mempool, all recycled buffers
-		 * are put into mempool.
-		 */
-		if (nb_recycle_mbufs == 0)
-			for (i = 0; i < n; i++) {
-				if (rxep[i] != NULL)
-					rte_mempool_put(rxep[i]->pool, rxep[i]);
-			}
-	}
-
-	/* Update counters for Tx. */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return nb_recycle_mbufs;
+	return ci_tx_recycle_mbufs(tx_queue, recycle_rxq_info);
 }
-- 
2.47.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2025-05-06 13:29 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-05-06 13:27 [PATCH v1 01/13] net/ixgbe: remove unused field in Rx queue struct Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 02/13] net/iavf: make IPsec stats dynamically allocated Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 03/13] net/ixgbe: create common Rx queue structure Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 04/13] net/i40e: use the " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 05/13] net/ice: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 06/13] net/iavf: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 07/13] net/intel: generalize vectorized Rx rearm Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 08/13] net/i40e: use common Rx rearm code Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 09/13] net/iavf: " Anatoly Burakov
2025-05-06 13:27 ` [PATCH v1 10/13] net/ixgbe: " Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 11/13] net/intel: support wider x86 vectors for Rx rearm Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 12/13] net/intel: add common Rx mbuf recycle Anatoly Burakov
2025-05-06 13:28 ` [PATCH v1 13/13] net/intel: add common Tx " Anatoly Burakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).