* [RFC PATCH 01/27] net/intel: create common Tx descriptor structure
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 02/27] net/intel: use common tx ring structure Bruce Richardson
` (25 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Praveen Shetty, Vladimir Medvedkin,
Anatoly Burakov, Jingjing Wu
The Tx descriptors used by the i40e, iavf, ice and idpf drivers are all
identical 16-byte descriptors, so define a common struct for them. Since
original struct definitions are in base code, leave them in place, but
only use the new structs in DPDK code.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 16 ++++++---
drivers/net/intel/cpfl/cpfl_rxtx.c | 2 +-
drivers/net/intel/i40e/i40e_fdir.c | 4 +--
drivers/net/intel/i40e/i40e_rxtx.c | 26 +++++++-------
.../net/intel/i40e/i40e_rxtx_vec_altivec.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx_vec_neon.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx_vec_sse.c | 6 ++--
drivers/net/intel/iavf/iavf_rxtx.c | 16 ++++-----
drivers/net/intel/iavf/iavf_rxtx.h | 2 +-
drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c | 6 ++--
drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c | 12 +++----
drivers/net/intel/iavf/iavf_rxtx_vec_sse.c | 6 ++--
drivers/net/intel/ice/ice_dcf_ethdev.c | 2 +-
drivers/net/intel/ice/ice_rxtx.c | 36 +++++++++----------
drivers/net/intel/ice/ice_rxtx_vec_avx2.c | 6 ++--
drivers/net/intel/ice/ice_rxtx_vec_avx512.c | 6 ++--
drivers/net/intel/ice/ice_rxtx_vec_sse.c | 6 ++--
drivers/net/intel/idpf/idpf_common_rxtx.c | 20 +++++------
drivers/net/intel/idpf/idpf_common_rxtx.h | 2 +-
.../net/intel/idpf/idpf_common_rxtx_avx2.c | 8 ++---
.../net/intel/idpf/idpf_common_rxtx_avx512.c | 8 ++---
drivers/net/intel/idpf/idpf_rxtx.c | 2 +-
drivers/net/intel/idpf/idpf_rxtx_vec_common.h | 2 +-
25 files changed, 113 insertions(+), 105 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index b259d98904..722f87a70c 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -10,6 +10,14 @@
#include <rte_ethdev.h>
#include <rte_vect.h>
+/*
+ * Structure of a 16-byte Tx descriptor common across i40e, ice, iavf and idpf drivers
+ */
+struct ci_tx_desc {
+ uint64_t buffer_addr; /* Address of descriptor's data buf */
+ uint64_t cmd_type_offset_bsz;
+};
+
/* forward declaration of the common intel (ci) queue structure */
struct ci_tx_queue;
@@ -33,10 +41,10 @@ typedef void (*ice_tx_release_mbufs_t)(struct ci_tx_queue *txq);
struct ci_tx_queue {
union { /* TX ring virtual address */
- volatile struct i40e_tx_desc *i40e_tx_ring;
- volatile struct iavf_tx_desc *iavf_tx_ring;
- volatile struct ice_tx_desc *ice_tx_ring;
- volatile struct idpf_base_tx_desc *idpf_tx_ring;
+ volatile struct ci_tx_desc *i40e_tx_ring;
+ volatile struct ci_tx_desc *iavf_tx_ring;
+ volatile struct ci_tx_desc *ice_tx_ring;
+ volatile struct ci_tx_desc *idpf_tx_ring;
volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
};
volatile uint8_t *qtx_tail; /* register address of tail */
diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c
index 2e4cf3b875..57c6f6e736 100644
--- a/drivers/net/intel/cpfl/cpfl_rxtx.c
+++ b/drivers/net/intel/cpfl/cpfl_rxtx.c
@@ -131,7 +131,7 @@ cpfl_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
CPFL_DMA_MEM_ALIGN);
else
- ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
+ ring_size = RTE_ALIGN(len * sizeof(struct ci_tx_desc),
CPFL_DMA_MEM_ALIGN);
memcpy(ring_name, "cpfl Tx ring", sizeof("cpfl Tx ring"));
break;
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 55d18c5d4a..605df73c9e 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -1377,7 +1377,7 @@ i40e_find_available_buffer(struct rte_eth_dev *dev)
*/
if (fdir_info->txq_available_buf_count <= 0) {
uint16_t tmp_tail;
- volatile struct i40e_tx_desc *tmp_txdp;
+ volatile struct ci_tx_desc *tmp_txdp;
tmp_tail = txq->tx_tail;
tmp_txdp = &txq->i40e_tx_ring[tmp_tail + 1];
@@ -1628,7 +1628,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
struct ci_tx_queue *txq = pf->fdir.txq;
struct ci_rx_queue *rxq = pf->fdir.rxq;
const struct i40e_fdir_action *fdir_action = &filter->action;
- volatile struct i40e_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
volatile struct i40e_filter_program_desc *fdirdp;
uint32_t td_cmd;
uint16_t vsi_id;
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 2db58c6b24..6307e9809f 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -384,7 +384,7 @@ static inline int
i40e_xmit_cleanup(struct ci_tx_queue *txq)
{
struct ci_tx_entry *sw_ring = txq->sw_ring;
- volatile struct i40e_tx_desc *txd = txq->i40e_tx_ring;
+ volatile struct ci_tx_desc *txd = txq->i40e_tx_ring;
uint16_t last_desc_cleaned = txq->last_desc_cleaned;
uint16_t nb_tx_desc = txq->nb_tx_desc;
uint16_t desc_to_clean_to;
@@ -1088,8 +1088,8 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
struct ci_tx_queue *txq;
struct ci_tx_entry *sw_ring;
struct ci_tx_entry *txe, *txn;
- volatile struct i40e_tx_desc *txd;
- volatile struct i40e_tx_desc *txr;
+ volatile struct ci_tx_desc *txd;
+ volatile struct ci_tx_desc *txr;
struct rte_mbuf *tx_pkt;
struct rte_mbuf *m_seg;
uint32_t cd_tunneling_params;
@@ -1389,7 +1389,7 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq)
/* Populate 4 descriptors with data from 4 mbufs */
static inline void
-tx4(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
+tx4(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkts)
{
uint64_t dma_addr;
uint32_t i;
@@ -1405,7 +1405,7 @@ tx4(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
/* Populate 1 descriptor with data from 1 mbuf */
static inline void
-tx1(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
+tx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkts)
{
uint64_t dma_addr;
@@ -1422,7 +1422,7 @@ i40e_tx_fill_hw_ring(struct ci_tx_queue *txq,
struct rte_mbuf **pkts,
uint16_t nb_pkts)
{
- volatile struct i40e_tx_desc *txdp = &txq->i40e_tx_ring[txq->tx_tail];
+ volatile struct ci_tx_desc *txdp = &txq->i40e_tx_ring[txq->tx_tail];
struct ci_tx_entry *txep = &txq->sw_ring[txq->tx_tail];
const int N_PER_LOOP = 4;
const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
@@ -1450,7 +1450,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
- volatile struct i40e_tx_desc *txr = txq->i40e_tx_ring;
+ volatile struct ci_tx_desc *txr = txq->i40e_tx_ring;
uint16_t n = 0;
/**
@@ -2604,7 +2604,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
}
/* Allocate TX hardware ring descriptors. */
- ring_size = sizeof(struct i40e_tx_desc) * I40E_MAX_RING_DESC;
+ ring_size = sizeof(struct ci_tx_desc) * I40E_MAX_RING_DESC;
ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
tz = rte_eth_dma_zone_reserve(dev, "i40e_tx_ring", queue_idx,
ring_size, I40E_RING_BASE_ALIGN, socket_id);
@@ -2626,7 +2626,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->tx_deferred_start = tx_conf->tx_deferred_start;
txq->tx_ring_dma = tz->iova;
- txq->i40e_tx_ring = (struct i40e_tx_desc *)tz->addr;
+ txq->i40e_tx_ring = (struct ci_tx_desc *)tz->addr;
/* Allocate software ring */
txq->sw_ring =
@@ -2899,13 +2899,13 @@ i40e_reset_tx_queue(struct ci_tx_queue *txq)
}
txe = txq->sw_ring;
- size = sizeof(struct i40e_tx_desc) * txq->nb_tx_desc;
+ size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
((volatile char *)txq->i40e_tx_ring)[i] = 0;
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
- volatile struct i40e_tx_desc *txd = &txq->i40e_tx_ring[i];
+ volatile struct ci_tx_desc *txd = &txq->i40e_tx_ring[i];
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE);
@@ -3207,7 +3207,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
}
/* Allocate TX hardware ring descriptors. */
- ring_size = sizeof(struct i40e_tx_desc) * I40E_FDIR_NUM_TX_DESC;
+ ring_size = sizeof(struct ci_tx_desc) * I40E_FDIR_NUM_TX_DESC;
ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
tz = rte_eth_dma_zone_reserve(dev, "fdir_tx_ring",
@@ -3226,7 +3226,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
txq->i40e_vsi = pf->fdir.fdir_vsi;
txq->tx_ring_dma = tz->iova;
- txq->i40e_tx_ring = (struct i40e_tx_desc *)tz->addr;
+ txq->i40e_tx_ring = (struct ci_tx_desc *)tz->addr;
/*
* don't need to allocate software ring and reset for the fdir
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index bbb6d907cf..ef5b252898 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -446,7 +446,7 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
}
static inline void
-vtx1(volatile struct i40e_tx_desc *txdp,
+vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
@@ -459,7 +459,7 @@ vtx1(volatile struct i40e_tx_desc *txdp,
}
static inline void
-vtx(volatile struct i40e_tx_desc *txdp,
+vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
int i;
@@ -473,7 +473,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct i40e_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index aeb2756e7a..b3ce08c039 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -681,7 +681,7 @@ i40e_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
static inline void
-vtx1(volatile struct i40e_tx_desc *txdp,
+vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
@@ -694,7 +694,7 @@ vtx1(volatile struct i40e_tx_desc *txdp,
}
static inline void
-vtx(volatile struct i40e_tx_desc *txdp,
+vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
const uint64_t hi_qw_tmpl = (I40E_TX_DESC_DTYPE_DATA |
@@ -739,7 +739,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct i40e_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index 571987d27a..6971488750 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -750,7 +750,7 @@ i40e_recv_scattered_pkts_vec_avx512(void *rx_queue,
}
static inline void
-vtx1(volatile struct i40e_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
+vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
{
uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
@@ -762,7 +762,7 @@ vtx1(volatile struct i40e_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
}
static inline void
-vtx(volatile struct i40e_tx_desc *txdp,
+vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
const uint64_t hi_qw_tmpl = (I40E_TX_DESC_DTYPE_DATA |
@@ -807,7 +807,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct i40e_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index b5be0c1b59..6404b70c56 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -597,7 +597,7 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
}
static inline void
-vtx1(volatile struct i40e_tx_desc *txdp,
+vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
@@ -609,7 +609,7 @@ vtx1(volatile struct i40e_tx_desc *txdp,
}
static inline void
-vtx(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkt,
+vtx(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkt,
uint16_t nb_pkts, uint64_t flags)
{
int i;
@@ -623,7 +623,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
struct rte_mbuf **__rte_restrict tx_pkts, uint16_t nb_pkts)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct i40e_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index c209135890..2a360c18ad 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -604,7 +604,7 @@ i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
}
static inline void
-vtx1(volatile struct i40e_tx_desc *txdp,
+vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
@@ -617,7 +617,7 @@ vtx1(volatile struct i40e_tx_desc *txdp,
}
static inline void
-vtx(volatile struct i40e_tx_desc *txdp,
+vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
int i;
@@ -631,7 +631,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct i40e_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index ee53e6e802..c5e469a1ae 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -277,7 +277,7 @@ reset_tx_queue(struct ci_tx_queue *txq)
}
txe = txq->sw_ring;
- size = sizeof(struct iavf_tx_desc) * txq->nb_tx_desc;
+ size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
((volatile char *)txq->iavf_tx_ring)[i] = 0;
@@ -828,7 +828,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
}
/* Allocate TX hardware ring descriptors. */
- ring_size = sizeof(struct iavf_tx_desc) * IAVF_MAX_RING_DESC;
+ ring_size = sizeof(struct ci_tx_desc) * IAVF_MAX_RING_DESC;
ring_size = RTE_ALIGN(ring_size, IAVF_DMA_MEM_ALIGN);
mz = rte_eth_dma_zone_reserve(dev, "iavf_tx_ring", queue_idx,
ring_size, IAVF_RING_BASE_ALIGN,
@@ -840,7 +840,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
txq->tx_ring_dma = mz->iova;
- txq->iavf_tx_ring = (struct iavf_tx_desc *)mz->addr;
+ txq->iavf_tx_ring = (struct ci_tx_desc *)mz->addr;
txq->mz = mz;
reset_tx_queue(txq);
@@ -2334,7 +2334,7 @@ iavf_xmit_cleanup(struct ci_tx_queue *txq)
uint16_t desc_to_clean_to;
uint16_t nb_tx_to_clean;
- volatile struct iavf_tx_desc *txd = txq->iavf_tx_ring;
+ volatile struct ci_tx_desc *txd = txq->iavf_tx_ring;
desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
if (desc_to_clean_to >= nb_tx_desc)
@@ -2724,7 +2724,7 @@ iavf_calc_pkt_desc(struct rte_mbuf *tx_pkt)
}
static inline void
-iavf_fill_data_desc(volatile struct iavf_tx_desc *desc,
+iavf_fill_data_desc(volatile struct ci_tx_desc *desc,
uint64_t desc_template, uint16_t buffsz,
uint64_t buffer_addr)
{
@@ -2757,7 +2757,7 @@ uint16_t
iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct ci_tx_queue *txq = tx_queue;
- volatile struct iavf_tx_desc *txr = txq->iavf_tx_ring;
+ volatile struct ci_tx_desc *txr = txq->iavf_tx_ring;
struct ci_tx_entry *txe_ring = txq->sw_ring;
struct ci_tx_entry *txe, *txn;
struct rte_mbuf *mb, *mb_seg;
@@ -2775,7 +2775,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txe = &txe_ring[desc_idx];
for (idx = 0; idx < nb_pkts; idx++) {
- volatile struct iavf_tx_desc *ddesc;
+ volatile struct ci_tx_desc *ddesc;
struct iavf_ipsec_crypto_pkt_metadata *ipsec_md;
uint16_t nb_desc_ctx, nb_desc_ipsec;
@@ -2896,7 +2896,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
mb_seg = mb;
do {
- ddesc = (volatile struct iavf_tx_desc *)
+ ddesc = (volatile struct ci_tx_desc *)
&txr[desc_idx];
txn = &txe_ring[txe->next_id];
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index bff456e509..14580c5b8b 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -681,7 +681,7 @@ void iavf_dump_tx_descriptor(const struct ci_tx_queue *txq,
const volatile void *desc, uint16_t tx_id)
{
const char *name;
- const volatile struct iavf_tx_desc *tx_desc = desc;
+ const volatile struct ci_tx_desc *tx_desc = desc;
enum iavf_tx_desc_dtype_value type;
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index e417257086..c3d7083230 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -1630,7 +1630,7 @@ iavf_recv_scattered_pkts_vec_avx2_flex_rxd_offload(void *rx_queue,
static __rte_always_inline void
-iavf_vtx1(volatile struct iavf_tx_desc *txdp,
+iavf_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags, bool offload, uint8_t vlan_flag)
{
uint64_t high_qw =
@@ -1646,7 +1646,7 @@ iavf_vtx1(volatile struct iavf_tx_desc *txdp,
}
static __rte_always_inline void
-iavf_vtx(volatile struct iavf_tx_desc *txdp,
+iavf_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags, bool offload, uint8_t vlan_flag)
{
const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
@@ -1713,7 +1713,7 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts, bool offload)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct iavf_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
/* bit2 is reserved and must be set to 1 according to Spec */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index 7c0907b7cf..d79d96c7b7 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -1840,7 +1840,7 @@ tx_backlog_entry_avx512(struct ci_tx_entry_vec *txep,
}
static __rte_always_inline void
-iavf_vtx1(volatile struct iavf_tx_desc *txdp,
+iavf_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags,
bool offload, uint8_t vlan_flag)
{
@@ -1859,7 +1859,7 @@ iavf_vtx1(volatile struct iavf_tx_desc *txdp,
#define IAVF_TX_LEN_MASK 0xAA
#define IAVF_TX_OFF_MASK 0x55
static __rte_always_inline void
-iavf_vtx(volatile struct iavf_tx_desc *txdp,
+iavf_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags,
bool offload, uint8_t vlan_flag)
{
@@ -2068,7 +2068,7 @@ iavf_fill_ctx_desc_tunnelling_field(volatile uint64_t *qw0,
}
static __rte_always_inline void
-ctx_vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt,
+ctx_vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt,
uint64_t flags, bool offload, uint8_t vlan_flag)
{
uint64_t high_ctx_qw = IAVF_TX_DESC_DTYPE_CONTEXT;
@@ -2106,7 +2106,7 @@ ctx_vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt,
}
static __rte_always_inline void
-ctx_vtx(volatile struct iavf_tx_desc *txdp,
+ctx_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags,
bool offload, uint8_t vlan_flag)
{
@@ -2203,7 +2203,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts, bool offload)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct iavf_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
/* bit2 is reserved and must be set to 1 according to Spec */
@@ -2271,7 +2271,7 @@ iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts, bool offload)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct iavf_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, nb_mbuf, tx_id;
/* bit2 is reserved and must be set to 1 according to Spec */
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 9dae0a79bf..cb086cd352 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -1242,7 +1242,7 @@ iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
}
static inline void
-vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
+vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
{
uint64_t high_qw =
(IAVF_TX_DESC_DTYPE_DATA |
@@ -1256,7 +1256,7 @@ vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
}
static inline void
-iavf_vtx(volatile struct iavf_tx_desc *txdp, struct rte_mbuf **pkt,
+iavf_vtx(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkt,
uint16_t nb_pkts, uint64_t flags)
{
int i;
@@ -1270,7 +1270,7 @@ iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct iavf_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = IAVF_TX_DESC_CMD_EOP | 0x04; /* bit 2 must be set */
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index 81da5a4656..ab1d499cef 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -399,7 +399,7 @@ reset_tx_queue(struct ci_tx_queue *txq)
}
txe = txq->sw_ring;
- size = sizeof(struct ice_tx_desc) * txq->nb_tx_desc;
+ size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
((volatile char *)txq->ice_tx_ring)[i] = 0;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index f5d484c1e6..7358a95ce1 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -1111,13 +1111,13 @@ ice_reset_tx_queue(struct ci_tx_queue *txq)
}
txe = txq->sw_ring;
- size = sizeof(struct ice_tx_desc) * txq->nb_tx_desc;
+ size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
((volatile char *)txq->ice_tx_ring)[i] = 0;
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
- volatile struct ice_tx_desc *txd = &txq->ice_tx_ring[i];
+ volatile struct ci_tx_desc *txd = &txq->ice_tx_ring[i];
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE);
@@ -1609,7 +1609,7 @@ ice_tx_queue_setup(struct rte_eth_dev *dev,
}
/* Allocate TX hardware ring descriptors. */
- ring_size = sizeof(struct ice_tx_desc) * ICE_MAX_NUM_DESC_BY_MAC(hw);
+ ring_size = sizeof(struct ci_tx_desc) * ICE_MAX_NUM_DESC_BY_MAC(hw);
ring_size = RTE_ALIGN(ring_size, ICE_DMA_MEM_ALIGN);
tz = rte_eth_dma_zone_reserve(dev, "ice_tx_ring", queue_idx,
ring_size, ICE_RING_BASE_ALIGN,
@@ -2611,7 +2611,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
}
/* Allocate TX hardware ring descriptors. */
- ring_size = sizeof(struct ice_tx_desc) * ICE_FDIR_NUM_TX_DESC;
+ ring_size = sizeof(struct ci_tx_desc) * ICE_FDIR_NUM_TX_DESC;
ring_size = RTE_ALIGN(ring_size, ICE_DMA_MEM_ALIGN);
tz = rte_eth_dma_zone_reserve(dev, "fdir_tx_ring",
@@ -2630,7 +2630,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
txq->ice_vsi = pf->fdir.fdir_vsi;
txq->tx_ring_dma = tz->iova;
- txq->ice_tx_ring = (struct ice_tx_desc *)tz->addr;
+ txq->ice_tx_ring = (struct ci_tx_desc *)tz->addr;
/*
* don't need to allocate software ring and reset for the fdir
* program queue just set the queue has been configured.
@@ -3019,7 +3019,7 @@ static inline int
ice_xmit_cleanup(struct ci_tx_queue *txq)
{
struct ci_tx_entry *sw_ring = txq->sw_ring;
- volatile struct ice_tx_desc *txd = txq->ice_tx_ring;
+ volatile struct ci_tx_desc *txd = txq->ice_tx_ring;
uint16_t last_desc_cleaned = txq->last_desc_cleaned;
uint16_t nb_tx_desc = txq->nb_tx_desc;
uint16_t desc_to_clean_to;
@@ -3140,8 +3140,8 @@ uint16_t
ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct ci_tx_queue *txq;
- volatile struct ice_tx_desc *ice_tx_ring;
- volatile struct ice_tx_desc *txd;
+ volatile struct ci_tx_desc *ice_tx_ring;
+ volatile struct ci_tx_desc *txd;
struct ci_tx_entry *sw_ring;
struct ci_tx_entry *txe, *txn;
struct rte_mbuf *tx_pkt;
@@ -3304,7 +3304,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) &&
unlikely(slen > ICE_MAX_DATA_PER_TXD)) {
- txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
+ txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
@@ -3323,7 +3323,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txn = &sw_ring[txe->next_id];
}
- txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
+ txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
@@ -3544,14 +3544,14 @@ ice_tx_done_cleanup(void *txq, uint32_t free_cnt)
/* Populate 4 descriptors with data from 4 mbufs */
static inline void
-tx4(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkts)
+tx4(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkts)
{
uint64_t dma_addr;
uint32_t i;
for (i = 0; i < 4; i++, txdp++, pkts++) {
dma_addr = rte_mbuf_data_iova(*pkts);
- txdp->buf_addr = rte_cpu_to_le_64(dma_addr);
+ txdp->buffer_addr = rte_cpu_to_le_64(dma_addr);
txdp->cmd_type_offset_bsz =
ice_build_ctob((uint32_t)ICE_TD_CMD, 0,
(*pkts)->data_len, 0);
@@ -3560,12 +3560,12 @@ tx4(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkts)
/* Populate 1 descriptor with data from 1 mbuf */
static inline void
-tx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkts)
+tx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkts)
{
uint64_t dma_addr;
dma_addr = rte_mbuf_data_iova(*pkts);
- txdp->buf_addr = rte_cpu_to_le_64(dma_addr);
+ txdp->buffer_addr = rte_cpu_to_le_64(dma_addr);
txdp->cmd_type_offset_bsz =
ice_build_ctob((uint32_t)ICE_TD_CMD, 0,
(*pkts)->data_len, 0);
@@ -3575,7 +3575,7 @@ static inline void
ice_tx_fill_hw_ring(struct ci_tx_queue *txq, struct rte_mbuf **pkts,
uint16_t nb_pkts)
{
- volatile struct ice_tx_desc *txdp = &txq->ice_tx_ring[txq->tx_tail];
+ volatile struct ci_tx_desc *txdp = &txq->ice_tx_ring[txq->tx_tail];
struct ci_tx_entry *txep = &txq->sw_ring[txq->tx_tail];
const int N_PER_LOOP = 4;
const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
@@ -3608,7 +3608,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
- volatile struct ice_tx_desc *txr = txq->ice_tx_ring;
+ volatile struct ci_tx_desc *txr = txq->ice_tx_ring;
uint16_t n = 0;
/**
@@ -4891,7 +4891,7 @@ ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
struct ci_tx_queue *txq = pf->fdir.txq;
struct ci_rx_queue *rxq = pf->fdir.rxq;
volatile struct ice_fltr_desc *fdirdp;
- volatile struct ice_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
uint32_t td_cmd;
uint16_t i;
@@ -4901,7 +4901,7 @@ ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
fdirdp->dtype_cmd_vsi_fdid = fdir_desc->dtype_cmd_vsi_fdid;
txdp = &txq->ice_tx_ring[txq->tx_tail + 1];
- txdp->buf_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
+ txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
td_cmd = ICE_TX_DESC_CMD_EOP |
ICE_TX_DESC_CMD_RS |
ICE_TX_DESC_CMD_DUMMY;
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index b952b8dddc..95c4f4569c 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -774,7 +774,7 @@ ice_recv_scattered_pkts_vec_avx2_offload(void *rx_queue,
}
static __rte_always_inline void
-ice_vtx1(volatile struct ice_tx_desc *txdp,
+ice_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags, bool offload)
{
uint64_t high_qw =
@@ -789,7 +789,7 @@ ice_vtx1(volatile struct ice_tx_desc *txdp,
}
static __rte_always_inline void
-ice_vtx(volatile struct ice_tx_desc *txdp,
+ice_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags, bool offload)
{
const uint64_t hi_qw_tmpl = (ICE_TX_DESC_DTYPE_DATA |
@@ -852,7 +852,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts, bool offload)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct ice_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = ICE_TD_CMD;
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index 7c6fe82072..1f6bf5fc8e 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -847,7 +847,7 @@ ice_recv_scattered_pkts_vec_avx512_offload(void *rx_queue,
}
static __rte_always_inline void
-ice_vtx1(volatile struct ice_tx_desc *txdp,
+ice_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags, bool do_offload)
{
uint64_t high_qw =
@@ -863,7 +863,7 @@ ice_vtx1(volatile struct ice_tx_desc *txdp,
}
static __rte_always_inline void
-ice_vtx(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkt,
+ice_vtx(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkt,
uint16_t nb_pkts, uint64_t flags, bool do_offload)
{
const uint64_t hi_qw_tmpl = (ICE_TX_DESC_DTYPE_DATA |
@@ -916,7 +916,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts, bool do_offload)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct ice_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = ICE_TD_CMD;
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 4fc1b7e881..44f3fc0fa5 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -596,7 +596,7 @@ ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
}
static inline void
-ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
+ice_vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt,
uint64_t flags)
{
uint64_t high_qw =
@@ -609,7 +609,7 @@ ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
}
static inline void
-ice_vtx(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkt,
+ice_vtx(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkt,
uint16_t nb_pkts, uint64_t flags)
{
int i;
@@ -623,7 +623,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct ice_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = ICE_TD_CMD;
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index 797ee515dd..be3c1ef216 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -264,13 +264,13 @@ idpf_qc_single_tx_queue_reset(struct ci_tx_queue *txq)
}
txe = txq->sw_ring;
- size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
+ size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
((volatile char *)txq->idpf_tx_ring)[i] = 0;
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
- txq->idpf_tx_ring[i].qw1 =
+ txq->idpf_tx_ring[i].cmd_type_offset_bsz =
rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
@@ -1335,14 +1335,14 @@ idpf_xmit_cleanup(struct ci_tx_queue *txq)
uint16_t desc_to_clean_to;
uint16_t nb_tx_to_clean;
- volatile struct idpf_base_tx_desc *txd = txq->idpf_tx_ring;
+ volatile struct ci_tx_desc *txd = txq->idpf_tx_ring;
desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
if (desc_to_clean_to >= nb_tx_desc)
desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
- if ((txd[desc_to_clean_to].qw1 &
+ if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
TX_LOG(DEBUG, "TX descriptor %4u is not done "
@@ -1358,7 +1358,7 @@ idpf_xmit_cleanup(struct ci_tx_queue *txq)
nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
last_desc_cleaned);
- txd[desc_to_clean_to].qw1 = 0;
+ txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
txq->last_desc_cleaned = desc_to_clean_to;
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
@@ -1372,8 +1372,8 @@ uint16_t
idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
- volatile struct idpf_base_tx_desc *txd;
- volatile struct idpf_base_tx_desc *txr;
+ volatile struct ci_tx_desc *txd;
+ volatile struct ci_tx_desc *txr;
union idpf_tx_offload tx_offload = {0};
struct ci_tx_entry *txe, *txn;
struct ci_tx_entry *sw_ring;
@@ -1491,8 +1491,8 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Setup TX Descriptor */
slen = m_seg->data_len;
buf_dma_addr = rte_mbuf_data_iova(m_seg);
- txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
+ txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
+ txd->cmd_type_offset_bsz = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
((uint64_t)td_cmd << IDPF_TXD_QW1_CMD_S) |
((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) |
((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S));
@@ -1519,7 +1519,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->nb_tx_used = 0;
}
- txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
+ txd->cmd_type_offset_bsz |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
}
end_of_tx:
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.h b/drivers/net/intel/idpf/idpf_common_rxtx.h
index 7c6ff5d047..2f2fa153b2 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.h
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.h
@@ -182,7 +182,7 @@ union idpf_tx_offload {
};
union idpf_tx_desc {
- struct idpf_base_tx_desc *tx_ring;
+ struct ci_tx_desc *tx_ring;
struct idpf_flex_tx_sched_desc *desc_ring;
struct idpf_splitq_tx_compl_desc *compl_ring;
};
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
index 21c8f79254..5f5d538dcb 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
@@ -483,7 +483,7 @@ idpf_dp_singleq_recv_pkts_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16
}
static inline void
-idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
+idpf_singleq_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
uint64_t high_qw =
@@ -497,7 +497,7 @@ idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
}
static inline void
-idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
+idpf_singleq_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA |
@@ -556,7 +556,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts
uint16_t nb_pkts)
{
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- volatile struct idpf_base_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = IDPF_TX_DESC_CMD_EOP;
@@ -604,7 +604,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->idpf_tx_ring[txq->tx_next_rs].qw1 |=
+ txq->idpf_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
IDPF_TXD_QW1_CMD_S);
txq->tx_next_rs =
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
index bc2cadd738..c1ec3d1222 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
@@ -1000,7 +1000,7 @@ idpf_dp_splitq_recv_pkts_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
}
static __rte_always_inline void
-idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
+idpf_singleq_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
uint64_t high_qw =
@@ -1016,7 +1016,7 @@ idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
#define IDPF_TX_LEN_MASK 0xAA
#define IDPF_TX_OFF_MASK 0x55
static __rte_always_inline void
-idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
+idpf_singleq_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA |
@@ -1072,7 +1072,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
uint16_t nb_pkts)
{
struct ci_tx_queue *txq = tx_queue;
- volatile struct idpf_base_tx_desc *txdp;
+ volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = IDPF_TX_DESC_CMD_EOP;
@@ -1123,7 +1123,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->idpf_tx_ring[txq->tx_next_rs].qw1 |=
+ txq->idpf_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
IDPF_TXD_QW1_CMD_S);
txq->tx_next_rs =
diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c
index 47f8347b41..9b63e44341 100644
--- a/drivers/net/intel/idpf/idpf_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_rxtx.c
@@ -72,7 +72,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
IDPF_DMA_MEM_ALIGN);
else
- ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
+ ring_size = RTE_ALIGN(len * sizeof(struct ci_tx_desc),
IDPF_DMA_MEM_ALIGN);
rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
break;
diff --git a/drivers/net/intel/idpf/idpf_rxtx_vec_common.h b/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
index 425f0792a1..4702061484 100644
--- a/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
+++ b/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
@@ -31,7 +31,7 @@ idpf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
if (txq->complq != NULL)
return 1;
- return (txq->idpf_tx_ring[idx].qw1 &
+ return (txq->idpf_tx_ring[idx].cmd_type_offset_bsz &
rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) ==
rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
}
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 02/27] net/intel: use common tx ring structure
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 01/27] net/intel: create common Tx descriptor structure Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 03/27] net/intel: create common post-Tx cleanup function Bruce Richardson
` (24 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Praveen Shetty, Vladimir Medvedkin,
Anatoly Burakov, Jingjing Wu
Rather than having separate per-driver ring pointers in a union, since
we now have a common descriptor type, we can merge all but the ixgbe
pointer into one value.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 5 +--
drivers/net/intel/cpfl/cpfl_rxtx.c | 2 +-
drivers/net/intel/i40e/i40e_fdir.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx.c | 22 ++++++------
.../net/intel/i40e/i40e_rxtx_vec_altivec.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx_vec_common.h | 2 +-
drivers/net/intel/i40e/i40e_rxtx_vec_neon.c | 6 ++--
drivers/net/intel/i40e/i40e_rxtx_vec_sse.c | 6 ++--
drivers/net/intel/iavf/iavf_rxtx.c | 14 ++++----
drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c | 6 ++--
drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c | 12 +++----
drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 2 +-
drivers/net/intel/iavf/iavf_rxtx_vec_sse.c | 6 ++--
drivers/net/intel/ice/ice_dcf_ethdev.c | 4 +--
drivers/net/intel/ice/ice_rxtx.c | 34 +++++++++----------
drivers/net/intel/ice/ice_rxtx_vec_avx2.c | 6 ++--
drivers/net/intel/ice/ice_rxtx_vec_avx512.c | 6 ++--
drivers/net/intel/ice/ice_rxtx_vec_common.h | 2 +-
drivers/net/intel/ice/ice_rxtx_vec_sse.c | 6 ++--
drivers/net/intel/idpf/idpf_common_rxtx.c | 8 ++---
.../net/intel/idpf/idpf_common_rxtx_avx2.c | 6 ++--
.../net/intel/idpf/idpf_common_rxtx_avx512.c | 6 ++--
drivers/net/intel/idpf/idpf_rxtx.c | 2 +-
drivers/net/intel/idpf/idpf_rxtx_vec_common.h | 2 +-
26 files changed, 93 insertions(+), 96 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 722f87a70c..a9ff3bebd5 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -41,10 +41,7 @@ typedef void (*ice_tx_release_mbufs_t)(struct ci_tx_queue *txq);
struct ci_tx_queue {
union { /* TX ring virtual address */
- volatile struct ci_tx_desc *i40e_tx_ring;
- volatile struct ci_tx_desc *iavf_tx_ring;
- volatile struct ci_tx_desc *ice_tx_ring;
- volatile struct ci_tx_desc *idpf_tx_ring;
+ volatile struct ci_tx_desc *ci_tx_ring;
volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
};
volatile uint8_t *qtx_tail; /* register address of tail */
diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c
index 57c6f6e736..a3127e7c97 100644
--- a/drivers/net/intel/cpfl/cpfl_rxtx.c
+++ b/drivers/net/intel/cpfl/cpfl_rxtx.c
@@ -594,7 +594,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
}
if (!is_splitq) {
- txq->idpf_tx_ring = mz->addr;
+ txq->ci_tx_ring = mz->addr;
idpf_qc_single_tx_queue_reset(txq);
} else {
txq->desc_ring = mz->addr;
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 605df73c9e..8a01aec0e2 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -1380,7 +1380,7 @@ i40e_find_available_buffer(struct rte_eth_dev *dev)
volatile struct ci_tx_desc *tmp_txdp;
tmp_tail = txq->tx_tail;
- tmp_txdp = &txq->i40e_tx_ring[tmp_tail + 1];
+ tmp_txdp = &txq->ci_tx_ring[tmp_tail + 1];
do {
if ((tmp_txdp->cmd_type_offset_bsz &
@@ -1637,7 +1637,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
PMD_DRV_LOG(INFO, "filling filter programming descriptor.");
fdirdp = (volatile struct i40e_filter_program_desc *)
- (&txq->i40e_tx_ring[txq->tx_tail]);
+ (&txq->ci_tx_ring[txq->tx_tail]);
fdirdp->qindex_flex_ptype_vsi =
rte_cpu_to_le_32((fdir_action->rx_queue <<
@@ -1707,7 +1707,7 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
fdirdp->fd_id = rte_cpu_to_le_32(filter->soft_id);
PMD_DRV_LOG(INFO, "filling transmit descriptor.");
- txdp = &txq->i40e_tx_ring[txq->tx_tail + 1];
+ txdp = &txq->ci_tx_ring[txq->tx_tail + 1];
txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr[txq->tx_tail >> 1]);
td_cmd = I40E_TX_DESC_CMD_EOP |
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 6307e9809f..2af3098f81 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -384,7 +384,7 @@ static inline int
i40e_xmit_cleanup(struct ci_tx_queue *txq)
{
struct ci_tx_entry *sw_ring = txq->sw_ring;
- volatile struct ci_tx_desc *txd = txq->i40e_tx_ring;
+ volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
uint16_t last_desc_cleaned = txq->last_desc_cleaned;
uint16_t nb_tx_desc = txq->nb_tx_desc;
uint16_t desc_to_clean_to;
@@ -1108,7 +1108,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txq = tx_queue;
sw_ring = txq->sw_ring;
- txr = txq->i40e_tx_ring;
+ txr = txq->ci_tx_ring;
tx_id = txq->tx_tail;
txe = &sw_ring[tx_id];
@@ -1343,7 +1343,7 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq)
const uint16_t k = RTE_ALIGN_FLOOR(tx_rs_thresh, I40E_TX_MAX_FREE_BUF_SZ);
const uint16_t m = tx_rs_thresh % I40E_TX_MAX_FREE_BUF_SZ;
- if ((txq->i40e_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+ if ((txq->ci_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
return 0;
@@ -1422,7 +1422,7 @@ i40e_tx_fill_hw_ring(struct ci_tx_queue *txq,
struct rte_mbuf **pkts,
uint16_t nb_pkts)
{
- volatile struct ci_tx_desc *txdp = &txq->i40e_tx_ring[txq->tx_tail];
+ volatile struct ci_tx_desc *txdp = &txq->ci_tx_ring[txq->tx_tail];
struct ci_tx_entry *txep = &txq->sw_ring[txq->tx_tail];
const int N_PER_LOOP = 4;
const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
@@ -1450,7 +1450,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
- volatile struct ci_tx_desc *txr = txq->i40e_tx_ring;
+ volatile struct ci_tx_desc *txr = txq->ci_tx_ring;
uint16_t n = 0;
/**
@@ -2409,7 +2409,7 @@ i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
desc -= txq->nb_tx_desc;
}
- status = &txq->i40e_tx_ring[desc].cmd_type_offset_bsz;
+ status = &txq->ci_tx_ring[desc].cmd_type_offset_bsz;
mask = rte_le_to_cpu_64(I40E_TXD_QW1_DTYPE_MASK);
expect = rte_cpu_to_le_64(
I40E_TX_DESC_DTYPE_DESC_DONE << I40E_TXD_QW1_DTYPE_SHIFT);
@@ -2606,7 +2606,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
/* Allocate TX hardware ring descriptors. */
ring_size = sizeof(struct ci_tx_desc) * I40E_MAX_RING_DESC;
ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
- tz = rte_eth_dma_zone_reserve(dev, "i40e_tx_ring", queue_idx,
+ tz = rte_eth_dma_zone_reserve(dev, "ci_tx_ring", queue_idx,
ring_size, I40E_RING_BASE_ALIGN, socket_id);
if (!tz) {
i40e_tx_queue_release(txq);
@@ -2626,7 +2626,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->tx_deferred_start = tx_conf->tx_deferred_start;
txq->tx_ring_dma = tz->iova;
- txq->i40e_tx_ring = (struct ci_tx_desc *)tz->addr;
+ txq->ci_tx_ring = (struct ci_tx_desc *)tz->addr;
/* Allocate software ring */
txq->sw_ring =
@@ -2901,11 +2901,11 @@ i40e_reset_tx_queue(struct ci_tx_queue *txq)
txe = txq->sw_ring;
size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
- ((volatile char *)txq->i40e_tx_ring)[i] = 0;
+ ((volatile char *)txq->ci_tx_ring)[i] = 0;
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
- volatile struct ci_tx_desc *txd = &txq->i40e_tx_ring[i];
+ volatile struct ci_tx_desc *txd = &txq->ci_tx_ring[i];
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE);
@@ -3226,7 +3226,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
txq->i40e_vsi = pf->fdir.fdir_vsi;
txq->tx_ring_dma = tz->iova;
- txq->i40e_tx_ring = (struct ci_tx_desc *)tz->addr;
+ txq->ci_tx_ring = (struct ci_tx_desc *)tz->addr;
/*
* don't need to allocate software ring and reset for the fdir
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index ef5b252898..81e9e2bc0b 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -489,7 +489,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -509,7 +509,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -519,7 +519,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->i40e_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
I40E_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index b3ce08c039..b25b05d79d 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -753,7 +753,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -774,7 +774,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -784,7 +784,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->i40e_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
I40E_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index 6971488750..9a967faeee 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -821,7 +821,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = (void *)txq->sw_ring;
txep += tx_id;
@@ -843,7 +843,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = txq->i40e_tx_ring;
+ txdp = txq->ci_tx_ring;
txep = (void *)txq->sw_ring;
}
@@ -853,7 +853,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->i40e_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
I40E_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
index 14651f2f06..1fd7fc75bf 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
@@ -15,7 +15,7 @@
static inline int
i40e_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
{
- return (txq->i40e_tx_ring[idx].cmd_type_offset_bsz &
+ return (txq->ci_tx_ring[idx].cmd_type_offset_bsz &
rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE);
}
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 6404b70c56..0b95152232 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -638,7 +638,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -658,7 +658,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -668,7 +668,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->i40e_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
I40E_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 2a360c18ad..2a3baa415e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -646,7 +646,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -666,7 +666,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->i40e_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -676,7 +676,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->i40e_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
I40E_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index c5e469a1ae..2ed778a872 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -279,11 +279,11 @@ reset_tx_queue(struct ci_tx_queue *txq)
txe = txq->sw_ring;
size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
- ((volatile char *)txq->iavf_tx_ring)[i] = 0;
+ ((volatile char *)txq->ci_tx_ring)[i] = 0;
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
- txq->iavf_tx_ring[i].cmd_type_offset_bsz =
+ txq->ci_tx_ring[i].cmd_type_offset_bsz =
rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
@@ -830,7 +830,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
/* Allocate TX hardware ring descriptors. */
ring_size = sizeof(struct ci_tx_desc) * IAVF_MAX_RING_DESC;
ring_size = RTE_ALIGN(ring_size, IAVF_DMA_MEM_ALIGN);
- mz = rte_eth_dma_zone_reserve(dev, "iavf_tx_ring", queue_idx,
+ mz = rte_eth_dma_zone_reserve(dev, "ci_tx_ring", queue_idx,
ring_size, IAVF_RING_BASE_ALIGN,
socket_id);
if (!mz) {
@@ -840,7 +840,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
txq->tx_ring_dma = mz->iova;
- txq->iavf_tx_ring = (struct ci_tx_desc *)mz->addr;
+ txq->ci_tx_ring = (struct ci_tx_desc *)mz->addr;
txq->mz = mz;
reset_tx_queue(txq);
@@ -2334,7 +2334,7 @@ iavf_xmit_cleanup(struct ci_tx_queue *txq)
uint16_t desc_to_clean_to;
uint16_t nb_tx_to_clean;
- volatile struct ci_tx_desc *txd = txq->iavf_tx_ring;
+ volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
if (desc_to_clean_to >= nb_tx_desc)
@@ -2757,7 +2757,7 @@ uint16_t
iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct ci_tx_queue *txq = tx_queue;
- volatile struct ci_tx_desc *txr = txq->iavf_tx_ring;
+ volatile struct ci_tx_desc *txr = txq->ci_tx_ring;
struct ci_tx_entry *txe_ring = txq->sw_ring;
struct ci_tx_entry *txe, *txn;
struct rte_mbuf *mb, *mb_seg;
@@ -4504,7 +4504,7 @@ iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset)
desc -= txq->nb_tx_desc;
}
- status = &txq->iavf_tx_ring[desc].cmd_type_offset_bsz;
+ status = &txq->ci_tx_ring[desc].cmd_type_offset_bsz;
mask = rte_le_to_cpu_64(IAVF_TXD_QW1_DTYPE_MASK);
expect = rte_cpu_to_le_64(
IAVF_TX_DESC_DTYPE_DESC_DONE << IAVF_TXD_QW1_DTYPE_SHIFT);
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index c3d7083230..82861b8398 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -1729,7 +1729,7 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
nb_commit = nb_pkts;
tx_id = txq->tx_tail;
- txdp = &txq->iavf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -1750,7 +1750,7 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->iavf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -1760,7 +1760,7 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->iavf_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
IAVF_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index d79d96c7b7..ad1b0b90cd 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -2219,7 +2219,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
nb_commit = nb_pkts;
tx_id = txq->tx_tail;
- txdp = &txq->iavf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = (void *)txq->sw_ring;
txep += tx_id;
@@ -2241,7 +2241,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->iavf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = (void *)txq->sw_ring;
txep += tx_id;
}
@@ -2252,7 +2252,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->iavf_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
IAVF_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
@@ -2288,7 +2288,7 @@ iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
nb_pkts = nb_commit >> 1;
tx_id = txq->tx_tail;
- txdp = &txq->iavf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = (void *)txq->sw_ring;
txep += (tx_id >> 1);
@@ -2309,7 +2309,7 @@ iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
tx_id = 0;
/* avoid reach the end of ring */
- txdp = txq->iavf_tx_ring;
+ txdp = txq->ci_tx_ring;
txep = (void *)txq->sw_ring;
}
@@ -2320,7 +2320,7 @@ iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->iavf_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
IAVF_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index f1ea57034f..1832b76f89 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -14,7 +14,7 @@
static inline int
iavf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
{
- return (txq->iavf_tx_ring[idx].cmd_type_offset_bsz &
+ return (txq->ci_tx_ring[idx].cmd_type_offset_bsz &
rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) ==
rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
}
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index cb086cd352..89ec05fa5d 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -1286,7 +1286,7 @@ iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
nb_commit = nb_pkts;
tx_id = txq->tx_tail;
- txdp = &txq->iavf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -1306,7 +1306,7 @@ iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->iavf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -1316,7 +1316,7 @@ iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->iavf_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
IAVF_TXD_QW1_CMD_SHIFT);
txq->tx_next_rs =
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index ab1d499cef..5f537b4c12 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -401,11 +401,11 @@ reset_tx_queue(struct ci_tx_queue *txq)
txe = txq->sw_ring;
size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
- ((volatile char *)txq->ice_tx_ring)[i] = 0;
+ ((volatile char *)txq->ci_tx_ring)[i] = 0;
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
- txq->ice_tx_ring[i].cmd_type_offset_bsz =
+ txq->ci_tx_ring[i].cmd_type_offset_bsz =
rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 7358a95ce1..4aded194ce 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -1113,11 +1113,11 @@ ice_reset_tx_queue(struct ci_tx_queue *txq)
txe = txq->sw_ring;
size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
- ((volatile char *)txq->ice_tx_ring)[i] = 0;
+ ((volatile char *)txq->ci_tx_ring)[i] = 0;
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
- volatile struct ci_tx_desc *txd = &txq->ice_tx_ring[i];
+ volatile struct ci_tx_desc *txd = &txq->ci_tx_ring[i];
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE);
@@ -1611,7 +1611,7 @@ ice_tx_queue_setup(struct rte_eth_dev *dev,
/* Allocate TX hardware ring descriptors. */
ring_size = sizeof(struct ci_tx_desc) * ICE_MAX_NUM_DESC_BY_MAC(hw);
ring_size = RTE_ALIGN(ring_size, ICE_DMA_MEM_ALIGN);
- tz = rte_eth_dma_zone_reserve(dev, "ice_tx_ring", queue_idx,
+ tz = rte_eth_dma_zone_reserve(dev, "ci_tx_ring", queue_idx,
ring_size, ICE_RING_BASE_ALIGN,
socket_id);
if (!tz) {
@@ -1633,7 +1633,7 @@ ice_tx_queue_setup(struct rte_eth_dev *dev,
txq->tx_deferred_start = tx_conf->tx_deferred_start;
txq->tx_ring_dma = tz->iova;
- txq->ice_tx_ring = tz->addr;
+ txq->ci_tx_ring = tz->addr;
/* Allocate software ring */
txq->sw_ring =
@@ -2547,7 +2547,7 @@ ice_tx_descriptor_status(void *tx_queue, uint16_t offset)
desc -= txq->nb_tx_desc;
}
- status = &txq->ice_tx_ring[desc].cmd_type_offset_bsz;
+ status = &txq->ci_tx_ring[desc].cmd_type_offset_bsz;
mask = rte_cpu_to_le_64(ICE_TXD_QW1_DTYPE_M);
expect = rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE <<
ICE_TXD_QW1_DTYPE_S);
@@ -2630,7 +2630,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf)
txq->ice_vsi = pf->fdir.fdir_vsi;
txq->tx_ring_dma = tz->iova;
- txq->ice_tx_ring = (struct ci_tx_desc *)tz->addr;
+ txq->ci_tx_ring = (struct ci_tx_desc *)tz->addr;
/*
* don't need to allocate software ring and reset for the fdir
* program queue just set the queue has been configured.
@@ -3019,7 +3019,7 @@ static inline int
ice_xmit_cleanup(struct ci_tx_queue *txq)
{
struct ci_tx_entry *sw_ring = txq->sw_ring;
- volatile struct ci_tx_desc *txd = txq->ice_tx_ring;
+ volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
uint16_t last_desc_cleaned = txq->last_desc_cleaned;
uint16_t nb_tx_desc = txq->nb_tx_desc;
uint16_t desc_to_clean_to;
@@ -3140,7 +3140,7 @@ uint16_t
ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct ci_tx_queue *txq;
- volatile struct ci_tx_desc *ice_tx_ring;
+ volatile struct ci_tx_desc *ci_tx_ring;
volatile struct ci_tx_desc *txd;
struct ci_tx_entry *sw_ring;
struct ci_tx_entry *txe, *txn;
@@ -3163,7 +3163,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txq = tx_queue;
sw_ring = txq->sw_ring;
- ice_tx_ring = txq->ice_tx_ring;
+ ci_tx_ring = txq->ci_tx_ring;
tx_id = txq->tx_tail;
txe = &sw_ring[tx_id];
@@ -3249,7 +3249,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Setup TX context descriptor if required */
volatile struct ice_tx_ctx_desc *ctx_txd =
(volatile struct ice_tx_ctx_desc *)
- &ice_tx_ring[tx_id];
+ &ci_tx_ring[tx_id];
uint16_t cd_l2tag2 = 0;
uint64_t cd_type_cmd_tso_mss = ICE_TX_DESC_DTYPE_CTX;
@@ -3291,7 +3291,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
m_seg = tx_pkt;
do {
- txd = &ice_tx_ring[tx_id];
+ txd = &ci_tx_ring[tx_id];
txn = &sw_ring[txe->next_id];
if (txe->mbuf)
@@ -3319,7 +3319,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txe->last_id = tx_last;
tx_id = txe->next_id;
txe = txn;
- txd = &ice_tx_ring[tx_id];
+ txd = &ci_tx_ring[tx_id];
txn = &sw_ring[txe->next_id];
}
@@ -3402,7 +3402,7 @@ ice_tx_free_bufs(struct ci_tx_queue *txq)
struct ci_tx_entry *txep;
uint16_t i;
- if ((txq->ice_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+ if ((txq->ci_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
rte_cpu_to_le_64(ICE_TXD_QW1_DTYPE_M)) !=
rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE))
return 0;
@@ -3575,7 +3575,7 @@ static inline void
ice_tx_fill_hw_ring(struct ci_tx_queue *txq, struct rte_mbuf **pkts,
uint16_t nb_pkts)
{
- volatile struct ci_tx_desc *txdp = &txq->ice_tx_ring[txq->tx_tail];
+ volatile struct ci_tx_desc *txdp = &txq->ci_tx_ring[txq->tx_tail];
struct ci_tx_entry *txep = &txq->sw_ring[txq->tx_tail];
const int N_PER_LOOP = 4;
const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
@@ -3608,7 +3608,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
- volatile struct ci_tx_desc *txr = txq->ice_tx_ring;
+ volatile struct ci_tx_desc *txr = txq->ci_tx_ring;
uint16_t n = 0;
/**
@@ -4896,11 +4896,11 @@ ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
uint16_t i;
fdirdp = (volatile struct ice_fltr_desc *)
- (&txq->ice_tx_ring[txq->tx_tail]);
+ (&txq->ci_tx_ring[txq->tx_tail]);
fdirdp->qidx_compq_space_stat = fdir_desc->qidx_compq_space_stat;
fdirdp->dtype_cmd_vsi_fdid = fdir_desc->dtype_cmd_vsi_fdid;
- txdp = &txq->ice_tx_ring[txq->tx_tail + 1];
+ txdp = &txq->ci_tx_ring[txq->tx_tail + 1];
txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
td_cmd = ICE_TX_DESC_CMD_EOP |
ICE_TX_DESC_CMD_RS |
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index 95c4f4569c..d553c438f8 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -869,7 +869,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->ice_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -890,7 +890,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->ice_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -900,7 +900,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->ice_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) <<
ICE_TXD_QW1_CMD_S);
txq->tx_next_rs =
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index 1f6bf5fc8e..d42f41461f 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -933,7 +933,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->ice_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = (void *)txq->sw_ring;
txep += tx_id;
@@ -955,7 +955,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = txq->ice_tx_ring;
+ txdp = txq->ci_tx_ring;
txep = (void *)txq->sw_ring;
}
@@ -965,7 +965,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->ice_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) <<
ICE_TXD_QW1_CMD_S);
txq->tx_next_rs =
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_common.h b/drivers/net/intel/ice/ice_rxtx_vec_common.h
index ff46a8fb49..8ba591e403 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/intel/ice/ice_rxtx_vec_common.h
@@ -11,7 +11,7 @@
static inline int
ice_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
{
- return (txq->ice_tx_ring[idx].cmd_type_offset_bsz &
+ return (txq->ci_tx_ring[idx].cmd_type_offset_bsz &
rte_cpu_to_le_64(ICE_TXD_QW1_DTYPE_M)) ==
rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE);
}
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index 44f3fc0fa5..c65240d659 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -642,7 +642,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->ice_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -662,7 +662,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->ice_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -672,7 +672,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->ice_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) <<
ICE_TXD_QW1_CMD_S);
txq->tx_next_rs =
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index be3c1ef216..51074bda3a 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -266,11 +266,11 @@ idpf_qc_single_tx_queue_reset(struct ci_tx_queue *txq)
txe = txq->sw_ring;
size = sizeof(struct ci_tx_desc) * txq->nb_tx_desc;
for (i = 0; i < size; i++)
- ((volatile char *)txq->idpf_tx_ring)[i] = 0;
+ ((volatile char *)txq->ci_tx_ring)[i] = 0;
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
- txq->idpf_tx_ring[i].cmd_type_offset_bsz =
+ txq->ci_tx_ring[i].cmd_type_offset_bsz =
rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
@@ -1335,7 +1335,7 @@ idpf_xmit_cleanup(struct ci_tx_queue *txq)
uint16_t desc_to_clean_to;
uint16_t nb_tx_to_clean;
- volatile struct ci_tx_desc *txd = txq->idpf_tx_ring;
+ volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
if (desc_to_clean_to >= nb_tx_desc)
@@ -1398,7 +1398,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
return nb_tx;
sw_ring = txq->sw_ring;
- txr = txq->idpf_tx_ring;
+ txr = txq->ci_tx_ring;
tx_id = txq->tx_tail;
txe = &sw_ring[tx_id];
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
index 5f5d538dcb..04efee3722 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
@@ -573,7 +573,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->idpf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
@@ -594,7 +594,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->idpf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = &txq->sw_ring_vec[tx_id];
}
@@ -604,7 +604,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->idpf_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
IDPF_TXD_QW1_CMD_S);
txq->tx_next_rs =
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
index c1ec3d1222..d5e5a2ca5f 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
@@ -1090,7 +1090,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
return 0;
tx_id = txq->tx_tail;
- txdp = &txq->idpf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = (void *)txq->sw_ring;
txep += tx_id;
@@ -1112,7 +1112,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
/* avoid reach the end of ring */
- txdp = &txq->idpf_tx_ring[tx_id];
+ txdp = &txq->ci_tx_ring[tx_id];
txep = (void *)txq->sw_ring;
txep += tx_id;
}
@@ -1123,7 +1123,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
- txq->idpf_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+ txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
IDPF_TXD_QW1_CMD_S);
txq->tx_next_rs =
diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c
index 9b63e44341..e974eb44b0 100644
--- a/drivers/net/intel/idpf/idpf_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_rxtx.c
@@ -469,7 +469,7 @@ idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
}
if (!is_splitq) {
- txq->idpf_tx_ring = mz->addr;
+ txq->ci_tx_ring = mz->addr;
idpf_qc_single_tx_queue_reset(txq);
} else {
txq->desc_ring = mz->addr;
diff --git a/drivers/net/intel/idpf/idpf_rxtx_vec_common.h b/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
index 4702061484..b5e8574667 100644
--- a/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
+++ b/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
@@ -31,7 +31,7 @@ idpf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
if (txq->complq != NULL)
return 1;
- return (txq->idpf_tx_ring[idx].cmd_type_offset_bsz &
+ return (txq->ci_tx_ring[idx].cmd_type_offset_bsz &
rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) ==
rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
}
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 03/27] net/intel: create common post-Tx cleanup function
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 01/27] net/intel: create common Tx descriptor structure Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 02/27] net/intel: use common tx ring structure Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 04/27] net/intel: consolidate definitions for Tx desc fields Bruce Richardson
` (23 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Vladimir Medvedkin, Anatoly Burakov,
Jingjing Wu, Praveen Shetty
The code used in ice, iavf, idpf and i40e for doing cleanup of mbufs
after they had been transmitted was identical. Therefore deduplicate it
by moving to common and remove the driver-specific versions.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 53 ++++++++++++++++++++
drivers/net/intel/i40e/i40e_rxtx.c | 49 ++----------------
drivers/net/intel/iavf/iavf_rxtx.c | 50 ++-----------------
drivers/net/intel/ice/ice_rxtx.c | 60 ++---------------------
drivers/net/intel/idpf/idpf_common_rxtx.c | 46 ++---------------
5 files changed, 71 insertions(+), 187 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index a9ff3bebd5..5b87c15da0 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -249,6 +249,59 @@ ci_tx_free_bufs_vec(struct ci_tx_queue *txq, ci_desc_done_fn desc_done, bool ctx
return txq->tx_rs_thresh;
}
+/*
+ * Common transmit descriptor cleanup function for Intel drivers.
+ * Used by ice, i40e, iavf, and idpf drivers.
+ *
+ * Returns:
+ * 0 on success
+ * -1 if cleanup cannot proceed (descriptors not yet processed by HW)
+ */
+static __rte_always_inline int
+ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
+{
+ struct ci_tx_entry *sw_ring = txq->sw_ring;
+ volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
+ uint16_t last_desc_cleaned = txq->last_desc_cleaned;
+ uint16_t nb_tx_desc = txq->nb_tx_desc;
+ uint16_t desc_to_clean_to;
+ uint16_t nb_tx_to_clean;
+
+ /* Determine the last descriptor needing to be cleaned */
+ desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
+ if (desc_to_clean_to >= nb_tx_desc)
+ desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
+
+ /* Check to make sure the last descriptor to clean is done */
+ desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
+
+ /* Check if descriptor is done - all drivers use 0xF as done value in bits 3:0 */
+ if ((txd[desc_to_clean_to].cmd_type_offset_bsz & rte_cpu_to_le_64(0xFUL)) !=
+ rte_cpu_to_le_64(0xFUL)) {
+ /* Descriptor not yet processed by hardware */
+ return -1;
+ }
+
+ /* Figure out how many descriptors will be cleaned */
+ if (last_desc_cleaned > desc_to_clean_to)
+ nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) + desc_to_clean_to);
+ else
+ nb_tx_to_clean = (uint16_t)(desc_to_clean_to - last_desc_cleaned);
+
+ /* The last descriptor to clean is done, so that means all the
+ * descriptors from the last descriptor that was cleaned
+ * up to the last descriptor with the RS bit set
+ * are done. Only reset the threshold descriptor.
+ */
+ txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
+
+ /* Update the txq to reflect the last descriptor that was cleaned */
+ txq->last_desc_cleaned = desc_to_clean_to;
+ txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
+
+ return 0;
+}
+
static inline void
ci_txq_release_all_mbufs(struct ci_tx_queue *txq, bool use_ctx)
{
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 2af3098f81..880013a515 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -380,45 +380,6 @@ i40e_build_ctob(uint32_t td_cmd,
((uint64_t)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
}
-static inline int
-i40e_xmit_cleanup(struct ci_tx_queue *txq)
-{
- struct ci_tx_entry *sw_ring = txq->sw_ring;
- volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
- uint16_t last_desc_cleaned = txq->last_desc_cleaned;
- uint16_t nb_tx_desc = txq->nb_tx_desc;
- uint16_t desc_to_clean_to;
- uint16_t nb_tx_to_clean;
-
- desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
- if (desc_to_clean_to >= nb_tx_desc)
- desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
-
- desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
- if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
- rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
- rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) {
- PMD_TX_LOG(DEBUG, "TX descriptor %4u is not done "
- "(port=%d queue=%d)", desc_to_clean_to,
- txq->port_id, txq->queue_id);
- return -1;
- }
-
- if (last_desc_cleaned > desc_to_clean_to)
- nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
- desc_to_clean_to);
- else
- nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
- last_desc_cleaned);
-
- txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
-
- txq->last_desc_cleaned = desc_to_clean_to;
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
-
- return 0;
-}
-
static inline int
#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
check_rx_burst_bulk_alloc_preconditions(struct ci_rx_queue *rxq)
@@ -1114,7 +1075,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Check if the descriptor ring needs to be cleaned. */
if (txq->nb_tx_free < txq->tx_free_thresh)
- (void)i40e_xmit_cleanup(txq);
+ (void)ci_tx_xmit_cleanup(txq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
td_cmd = 0;
@@ -1155,14 +1116,14 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
if (nb_used > txq->nb_tx_free) {
- if (i40e_xmit_cleanup(txq) != 0) {
+ if (ci_tx_xmit_cleanup(txq) != 0) {
if (nb_tx == 0)
return 0;
goto end_of_tx;
}
if (unlikely(nb_used > txq->tx_rs_thresh)) {
while (nb_used > txq->nb_tx_free) {
- if (i40e_xmit_cleanup(txq) != 0) {
+ if (ci_tx_xmit_cleanup(txq) != 0) {
if (nb_tx == 0)
return 0;
goto end_of_tx;
@@ -2794,7 +2755,7 @@ i40e_tx_done_cleanup_full(struct ci_tx_queue *txq,
tx_last = txq->tx_tail;
tx_id = swr_ring[tx_last].next_id;
- if (txq->nb_tx_free == 0 && i40e_xmit_cleanup(txq))
+ if (txq->nb_tx_free == 0 && ci_tx_xmit_cleanup(txq))
return 0;
nb_tx_to_clean = txq->nb_tx_free;
@@ -2828,7 +2789,7 @@ i40e_tx_done_cleanup_full(struct ci_tx_queue *txq,
break;
if (pkt_cnt < free_cnt) {
- if (i40e_xmit_cleanup(txq))
+ if (ci_tx_xmit_cleanup(txq))
break;
nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 2ed778a872..4605523673 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -2325,46 +2325,6 @@ iavf_recv_pkts_bulk_alloc(void *rx_queue,
return nb_rx;
}
-static inline int
-iavf_xmit_cleanup(struct ci_tx_queue *txq)
-{
- struct ci_tx_entry *sw_ring = txq->sw_ring;
- uint16_t last_desc_cleaned = txq->last_desc_cleaned;
- uint16_t nb_tx_desc = txq->nb_tx_desc;
- uint16_t desc_to_clean_to;
- uint16_t nb_tx_to_clean;
-
- volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
-
- desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
- if (desc_to_clean_to >= nb_tx_desc)
- desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
-
- desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
- if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
- rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) !=
- rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE)) {
- PMD_TX_LOG(DEBUG, "TX descriptor %4u is not done "
- "(port=%d queue=%d)", desc_to_clean_to,
- txq->port_id, txq->queue_id);
- return -1;
- }
-
- if (last_desc_cleaned > desc_to_clean_to)
- nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
- desc_to_clean_to);
- else
- nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
- last_desc_cleaned);
-
- txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
-
- txq->last_desc_cleaned = desc_to_clean_to;
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
-
- return 0;
-}
-
/* Check if the context descriptor is needed for TX offloading */
static inline uint16_t
iavf_calc_context_desc(struct rte_mbuf *mb, uint8_t vlan_flag)
@@ -2769,7 +2729,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Check if the descriptor ring needs to be cleaned. */
if (txq->nb_tx_free < txq->tx_free_thresh)
- iavf_xmit_cleanup(txq);
+ ci_tx_xmit_cleanup(txq);
desc_idx = txq->tx_tail;
txe = &txe_ring[desc_idx];
@@ -2824,14 +2784,14 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txq->port_id, txq->queue_id, desc_idx, desc_idx_last);
if (nb_desc_required > txq->nb_tx_free) {
- if (iavf_xmit_cleanup(txq)) {
+ if (ci_tx_xmit_cleanup(txq)) {
if (idx == 0)
return 0;
goto end_of_tx;
}
if (unlikely(nb_desc_required > txq->tx_rs_thresh)) {
while (nb_desc_required > txq->nb_tx_free) {
- if (iavf_xmit_cleanup(txq)) {
+ if (ci_tx_xmit_cleanup(txq)) {
if (idx == 0)
return 0;
goto end_of_tx;
@@ -4342,7 +4302,7 @@ iavf_tx_done_cleanup_full(struct ci_tx_queue *txq,
tx_id = txq->tx_tail;
tx_last = tx_id;
- if (txq->nb_tx_free == 0 && iavf_xmit_cleanup(txq))
+ if (txq->nb_tx_free == 0 && ci_tx_xmit_cleanup(txq))
return 0;
nb_tx_to_clean = txq->nb_tx_free;
@@ -4374,7 +4334,7 @@ iavf_tx_done_cleanup_full(struct ci_tx_queue *txq,
break;
if (pkt_cnt < free_cnt) {
- if (iavf_xmit_cleanup(txq))
+ if (ci_tx_xmit_cleanup(txq))
break;
nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 4aded194ce..0a6ca993c6 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -3015,56 +3015,6 @@ ice_txd_enable_checksum(uint64_t ol_flags,
}
}
-static inline int
-ice_xmit_cleanup(struct ci_tx_queue *txq)
-{
- struct ci_tx_entry *sw_ring = txq->sw_ring;
- volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
- uint16_t last_desc_cleaned = txq->last_desc_cleaned;
- uint16_t nb_tx_desc = txq->nb_tx_desc;
- uint16_t desc_to_clean_to;
- uint16_t nb_tx_to_clean;
-
- /* Determine the last descriptor needing to be cleaned */
- desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
- if (desc_to_clean_to >= nb_tx_desc)
- desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
-
- /* Check to make sure the last descriptor to clean is done */
- desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
- if (!(txd[desc_to_clean_to].cmd_type_offset_bsz &
- rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
- PMD_TX_LOG(DEBUG, "TX descriptor %4u is not done "
- "(port=%d queue=%d) value=0x%"PRIx64,
- desc_to_clean_to,
- txq->port_id, txq->queue_id,
- txd[desc_to_clean_to].cmd_type_offset_bsz);
- /* Failed to clean any descriptors */
- return -1;
- }
-
- /* Figure out how many descriptors will be cleaned */
- if (last_desc_cleaned > desc_to_clean_to)
- nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
- desc_to_clean_to);
- else
- nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
- last_desc_cleaned);
-
- /* The last descriptor to clean is done, so that means all the
- * descriptors from the last descriptor that was cleaned
- * up to the last descriptor with the RS bit set
- * are done. Only reset the threshold descriptor.
- */
- txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
-
- /* Update the txq to reflect the last descriptor that was cleaned */
- txq->last_desc_cleaned = desc_to_clean_to;
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
-
- return 0;
-}
-
/* Construct the tx flags */
static inline uint64_t
ice_build_ctob(uint32_t td_cmd,
@@ -3172,7 +3122,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Check if the descriptor ring needs to be cleaned. */
if (txq->nb_tx_free < txq->tx_free_thresh)
- (void)ice_xmit_cleanup(txq);
+ (void)ci_tx_xmit_cleanup(txq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
tx_pkt = *tx_pkts++;
@@ -3209,14 +3159,14 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
if (nb_used > txq->nb_tx_free) {
- if (ice_xmit_cleanup(txq) != 0) {
+ if (ci_tx_xmit_cleanup(txq) != 0) {
if (nb_tx == 0)
return 0;
goto end_of_tx;
}
if (unlikely(nb_used > txq->tx_rs_thresh)) {
while (nb_used > txq->nb_tx_free) {
- if (ice_xmit_cleanup(txq) != 0) {
+ if (ci_tx_xmit_cleanup(txq) != 0) {
if (nb_tx == 0)
return 0;
goto end_of_tx;
@@ -3446,7 +3396,7 @@ ice_tx_done_cleanup_full(struct ci_tx_queue *txq,
tx_last = txq->tx_tail;
tx_id = swr_ring[tx_last].next_id;
- if (txq->nb_tx_free == 0 && ice_xmit_cleanup(txq))
+ if (txq->nb_tx_free == 0 && ci_tx_xmit_cleanup(txq))
return 0;
nb_tx_to_clean = txq->nb_tx_free;
@@ -3480,7 +3430,7 @@ ice_tx_done_cleanup_full(struct ci_tx_queue *txq,
break;
if (pkt_cnt < free_cnt) {
- if (ice_xmit_cleanup(txq))
+ if (ci_tx_xmit_cleanup(txq))
break;
nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index 51074bda3a..23666539ab 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -1326,46 +1326,6 @@ idpf_dp_singleq_recv_scatter_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_rx;
}
-static inline int
-idpf_xmit_cleanup(struct ci_tx_queue *txq)
-{
- uint16_t last_desc_cleaned = txq->last_desc_cleaned;
- struct ci_tx_entry *sw_ring = txq->sw_ring;
- uint16_t nb_tx_desc = txq->nb_tx_desc;
- uint16_t desc_to_clean_to;
- uint16_t nb_tx_to_clean;
-
- volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
-
- desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
- if (desc_to_clean_to >= nb_tx_desc)
- desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
-
- desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
- if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
- rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
- rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
- TX_LOG(DEBUG, "TX descriptor %4u is not done "
- "(port=%d queue=%d)", desc_to_clean_to,
- txq->port_id, txq->queue_id);
- return -1;
- }
-
- if (last_desc_cleaned > desc_to_clean_to)
- nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
- desc_to_clean_to);
- else
- nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
- last_desc_cleaned);
-
- txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
-
- txq->last_desc_cleaned = desc_to_clean_to;
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
-
- return 0;
-}
-
/* TX function */
RTE_EXPORT_INTERNAL_SYMBOL(idpf_dp_singleq_xmit_pkts)
uint16_t
@@ -1404,7 +1364,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Check if the descriptor ring needs to be cleaned. */
if (txq->nb_tx_free < txq->tx_free_thresh)
- (void)idpf_xmit_cleanup(txq);
+ (void)ci_tx_xmit_cleanup(txq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
td_cmd = 0;
@@ -1437,14 +1397,14 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->port_id, txq->queue_id, tx_id, tx_last);
if (nb_used > txq->nb_tx_free) {
- if (idpf_xmit_cleanup(txq) != 0) {
+ if (ci_tx_xmit_cleanup(txq) != 0) {
if (nb_tx == 0)
return 0;
goto end_of_tx;
}
if (unlikely(nb_used > txq->tx_rs_thresh)) {
while (nb_used > txq->nb_tx_free) {
- if (idpf_xmit_cleanup(txq) != 0) {
+ if (ci_tx_xmit_cleanup(txq) != 0) {
if (nb_tx == 0)
return 0;
goto end_of_tx;
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 04/27] net/intel: consolidate definitions for Tx desc fields
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (2 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 03/27] net/intel: create common post-Tx cleanup function Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 05/27] net/intel: create separate header for Tx scalar fns Bruce Richardson
` (22 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Vladimir Medvedkin, Anatoly Burakov,
Jingjing Wu, Praveen Shetty
The offsets of the various fields within the Tx descriptors are common
for i40e, iavf, ice and idpf, so put a single set of defines in tx.h and
use those throughout all drivers. (NOTE: there was a small difference in
mask of CMD field between drivers depending on whether reserved fields
or not were included. Those can be ignored as those bits are unused in
the drivers for which they are reserved). Similarly, the various flag
fields, such as End-of-packet (EOP) and Report-status (RS) are the same,
as are offload definitions so consolidate them.
Original definitions are in base code, and are left in place because of
that, but are unused.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 64 +++++++-
drivers/net/intel/i40e/i40e_fdir.c | 24 +--
drivers/net/intel/i40e/i40e_rxtx.c | 92 ++++++------
drivers/net/intel/i40e/i40e_rxtx.h | 17 +--
.../net/intel/i40e/i40e_rxtx_vec_altivec.c | 11 +-
drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c | 22 ++-
drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c | 38 ++---
drivers/net/intel/i40e/i40e_rxtx_vec_common.h | 4 +-
drivers/net/intel/i40e/i40e_rxtx_vec_neon.c | 11 +-
drivers/net/intel/i40e/i40e_rxtx_vec_sse.c | 11 +-
drivers/net/intel/iavf/iavf_rxtx.c | 68 +++++----
drivers/net/intel/iavf/iavf_rxtx.h | 20 +--
drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c | 41 ++----
drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c | 80 ++++------
drivers/net/intel/iavf/iavf_rxtx_vec_common.h | 34 ++---
drivers/net/intel/iavf/iavf_rxtx_vec_sse.c | 15 +-
drivers/net/intel/ice/ice_dcf_ethdev.c | 2 +-
drivers/net/intel/ice/ice_rxtx.c | 137 ++++++++----------
drivers/net/intel/ice/ice_rxtx.h | 15 +-
drivers/net/intel/ice/ice_rxtx_vec_avx2.c | 41 ++----
drivers/net/intel/ice/ice_rxtx_vec_avx512.c | 39 ++---
drivers/net/intel/ice/ice_rxtx_vec_common.h | 41 +++---
drivers/net/intel/ice/ice_rxtx_vec_sse.c | 12 +-
drivers/net/intel/idpf/idpf_common_rxtx.c | 22 +--
drivers/net/intel/idpf/idpf_common_rxtx.h | 12 --
.../net/intel/idpf/idpf_common_rxtx_avx2.c | 41 ++----
.../net/intel/idpf/idpf_common_rxtx_avx512.c | 41 ++----
drivers/net/intel/idpf/idpf_rxtx_vec_common.h | 4 +-
28 files changed, 424 insertions(+), 535 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 5b87c15da0..3d3d9ad8e3 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -10,6 +10,66 @@
#include <rte_ethdev.h>
#include <rte_vect.h>
+/* Common TX Descriptor QW1 Field Definitions */
+#define CI_TXD_QW1_DTYPE_S 0
+#define CI_TXD_QW1_DTYPE_M (0xFUL << CI_TXD_QW1_DTYPE_S)
+#define CI_TXD_QW1_CMD_S 4
+#define CI_TXD_QW1_CMD_M (0xFFFUL << CI_TXD_QW1_CMD_S)
+#define CI_TXD_QW1_OFFSET_S 16
+#define CI_TXD_QW1_OFFSET_M (0x3FFFFULL << CI_TXD_QW1_OFFSET_S)
+#define CI_TXD_QW1_TX_BUF_SZ_S 34
+#define CI_TXD_QW1_TX_BUF_SZ_M (0x3FFFULL << CI_TXD_QW1_TX_BUF_SZ_S)
+#define CI_TXD_QW1_L2TAG1_S 48
+#define CI_TXD_QW1_L2TAG1_M (0xFFFFULL << CI_TXD_QW1_L2TAG1_S)
+
+/* Common Descriptor Types */
+#define CI_TX_DESC_DTYPE_DATA 0x0
+#define CI_TX_DESC_DTYPE_CTX 0x1
+#define CI_TX_DESC_DTYPE_DESC_DONE 0xF
+
+/* Common TX Descriptor Command Flags */
+#define CI_TX_DESC_CMD_EOP 0x0001
+#define CI_TX_DESC_CMD_RS 0x0002
+#define CI_TX_DESC_CMD_ICRC 0x0004
+#define CI_TX_DESC_CMD_IL2TAG1 0x0008
+#define CI_TX_DESC_CMD_DUMMY 0x0010
+#define CI_TX_DESC_CMD_IIPT_IPV6 0x0020
+#define CI_TX_DESC_CMD_IIPT_IPV4 0x0040
+#define CI_TX_DESC_CMD_IIPT_IPV4_CSUM 0x0060
+#define CI_TX_DESC_CMD_L4T_EOFT_TCP 0x0100
+#define CI_TX_DESC_CMD_L4T_EOFT_SCTP 0x0200
+#define CI_TX_DESC_CMD_L4T_EOFT_UDP 0x0300
+
+/* Common TX Context Descriptor Commands */
+#define CI_TX_CTX_DESC_TSO 0x01
+#define CI_TX_CTX_DESC_TSYN 0x02
+#define CI_TX_CTX_DESC_IL2TAG2 0x04
+
+/* Common TX Descriptor Length Field Shifts */
+#define CI_TX_DESC_LEN_MACLEN_S 0 /* 7 BITS */
+#define CI_TX_DESC_LEN_IPLEN_S 7 /* 7 BITS */
+#define CI_TX_DESC_LEN_L4_LEN_S 14 /* 4 BITS */
+
+/* Common maximum data per TX descriptor */
+#define CI_MAX_DATA_PER_TXD (CI_TXD_QW1_TX_BUF_SZ_M >> CI_TXD_QW1_TX_BUF_SZ_S)
+
+/**
+ * Common TX offload union for Intel drivers.
+ * Supports both basic offloads (l2_len, l3_len, l4_len, tso_segsz) and
+ * extended offloads (outer_l2_len, outer_l3_len) for tunneling support.
+ */
+union ci_tx_offload {
+ uint64_t data;
+ struct {
+ uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+ uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+ uint64_t l4_len:8; /**< L4 Header Length. */
+ uint64_t tso_segsz:16; /**< TCP TSO segment size */
+ uint64_t outer_l2_len:8; /**< outer L2 Header Length */
+ uint64_t outer_l3_len:16; /**< outer L3 Header Length */
+ };
+};
+
/*
* Structure of a 16-byte Tx descriptor common across i40e, ice, iavf and idpf drivers
*/
@@ -276,8 +336,8 @@ ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
/* Check if descriptor is done - all drivers use 0xF as done value in bits 3:0 */
- if ((txd[desc_to_clean_to].cmd_type_offset_bsz & rte_cpu_to_le_64(0xFUL)) !=
- rte_cpu_to_le_64(0xFUL)) {
+ if ((txd[desc_to_clean_to].cmd_type_offset_bsz & rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE)) {
/* Descriptor not yet processed by hardware */
return -1;
}
diff --git a/drivers/net/intel/i40e/i40e_fdir.c b/drivers/net/intel/i40e/i40e_fdir.c
index 8a01aec0e2..3b099d5a9e 100644
--- a/drivers/net/intel/i40e/i40e_fdir.c
+++ b/drivers/net/intel/i40e/i40e_fdir.c
@@ -916,11 +916,11 @@ i40e_build_ctob(uint32_t td_cmd,
unsigned int size,
uint32_t td_tag)
{
- return rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
- ((uint64_t)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
- ((uint64_t)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
+ return rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)size << CI_TXD_QW1_TX_BUF_SZ_S) |
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
}
/*
@@ -1384,8 +1384,8 @@ i40e_find_available_buffer(struct rte_eth_dev *dev)
do {
if ((tmp_txdp->cmd_type_offset_bsz &
- rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
- rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+ rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) ==
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
fdir_info->txq_available_buf_count++;
else
break;
@@ -1710,9 +1710,9 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
txdp = &txq->ci_tx_ring[txq->tx_tail + 1];
txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr[txq->tx_tail >> 1]);
- td_cmd = I40E_TX_DESC_CMD_EOP |
- I40E_TX_DESC_CMD_RS |
- I40E_TX_DESC_CMD_DUMMY;
+ td_cmd = CI_TX_DESC_CMD_EOP |
+ CI_TX_DESC_CMD_RS |
+ CI_TX_DESC_CMD_DUMMY;
txdp->cmd_type_offset_bsz =
i40e_build_ctob(td_cmd, 0, I40E_FDIR_PKT_LEN, 0);
@@ -1731,8 +1731,8 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
if (wait_status) {
for (i = 0; i < I40E_FDIR_MAX_WAIT_US; i++) {
if ((txdp->cmd_type_offset_bsz &
- rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
- rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+ rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) ==
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
break;
rte_delay_us(1);
}
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 880013a515..892069372f 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -41,7 +41,7 @@
/* Base address of the HW descriptor ring should be 128B aligned. */
#define I40E_RING_BASE_ALIGN 128
-#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+#define I40E_TXD_CMD (CI_TX_DESC_CMD_EOP | CI_TX_DESC_CMD_RS)
#ifdef RTE_LIBRTE_IEEE1588
#define I40E_TX_IEEE1588_TMST RTE_MBUF_F_TX_IEEE1588_TMST
@@ -256,7 +256,7 @@ i40e_rxd_build_fdir(volatile union ci_rx_desc *rxdp, struct rte_mbuf *mb)
static inline void
i40e_parse_tunneling_params(uint64_t ol_flags,
- union i40e_tx_offload tx_offload,
+ union ci_tx_offload tx_offload,
uint32_t *cd_tunneling)
{
/* EIPT: External (outer) IP header type */
@@ -315,51 +315,51 @@ static inline void
i40e_txd_enable_checksum(uint64_t ol_flags,
uint32_t *td_cmd,
uint32_t *td_offset,
- union i40e_tx_offload tx_offload)
+ union ci_tx_offload tx_offload)
{
/* Set MACLEN */
if (!(ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK))
*td_offset |= (tx_offload.l2_len >> 1)
- << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
+ << CI_TX_DESC_LEN_MACLEN_S;
/* Enable L3 checksum offloads */
if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
- *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
*td_offset |= (tx_offload.l3_len >> 2)
- << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+ << CI_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
- *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4;
*td_offset |= (tx_offload.l3_len >> 2)
- << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+ << CI_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
- *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV6;
*td_offset |= (tx_offload.l3_len >> 2)
- << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+ << CI_TX_DESC_LEN_IPLEN_S;
}
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
- *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
*td_offset |= (tx_offload.l4_len >> 2)
- << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ << CI_TX_DESC_LEN_L4_LEN_S;
return;
}
/* Enable L4 checksum offloads */
switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
case RTE_MBUF_F_TX_TCP_CKSUM:
- *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
*td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
- I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
case RTE_MBUF_F_TX_SCTP_CKSUM:
- *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_SCTP;
*td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
- I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
case RTE_MBUF_F_TX_UDP_CKSUM:
- *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
*td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
- I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
default:
break;
@@ -373,11 +373,11 @@ i40e_build_ctob(uint32_t td_cmd,
unsigned int size,
uint32_t td_tag)
{
- return rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
- ((uint64_t)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
- ((uint64_t)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
+ return rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)size << CI_TXD_QW1_TX_BUF_SZ_S) |
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
}
static inline int
@@ -1000,7 +1000,7 @@ i40e_calc_context_desc(uint64_t flags)
/* set i40e TSO context descriptor */
static inline uint64_t
-i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
+i40e_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
{
uint64_t ctx_desc = 0;
uint32_t cd_cmd, hdr_len, cd_tso_len;
@@ -1025,9 +1025,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
return ctx_desc;
}
-/* HW requires that Tx buffer size ranges from 1B up to (16K-1)B. */
-#define I40E_MAX_DATA_PER_TXD \
- (I40E_TXD_QW1_TX_BUF_SZ_MASK >> I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
/* Calculate the number of TX descriptors needed for each pkt */
static inline uint16_t
i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt)
@@ -1036,7 +1033,7 @@ i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt)
uint16_t count = 0;
while (txd != NULL) {
- count += DIV_ROUND_UP(txd->data_len, I40E_MAX_DATA_PER_TXD);
+ count += DIV_ROUND_UP(txd->data_len, CI_MAX_DATA_PER_TXD);
txd = txd->next;
}
@@ -1065,7 +1062,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
uint16_t tx_last;
uint16_t slen;
uint64_t buf_dma_addr;
- union i40e_tx_offload tx_offload = {0};
+ union ci_tx_offload tx_offload = {0};
txq = tx_queue;
sw_ring = txq->sw_ring;
@@ -1134,18 +1131,18 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Descriptor based VLAN insertion */
if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
- td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
+ td_cmd |= CI_TX_DESC_CMD_IL2TAG1;
td_tag = tx_pkt->vlan_tci;
}
/* Always enable CRC offload insertion */
- td_cmd |= I40E_TX_DESC_CMD_ICRC;
+ td_cmd |= CI_TX_DESC_CMD_ICRC;
/* Fill in tunneling parameters if necessary */
cd_tunneling_params = 0;
if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
td_offset |= (tx_offload.outer_l2_len >> 1)
- << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
+ << CI_TX_DESC_LEN_MACLEN_S;
i40e_parse_tunneling_params(ol_flags, tx_offload,
&cd_tunneling_params);
}
@@ -1225,16 +1222,16 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
buf_dma_addr = rte_mbuf_data_iova(m_seg);
while ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) &&
- unlikely(slen > I40E_MAX_DATA_PER_TXD)) {
+ unlikely(slen > CI_MAX_DATA_PER_TXD)) {
txd->buffer_addr =
rte_cpu_to_le_64(buf_dma_addr);
txd->cmd_type_offset_bsz =
i40e_build_ctob(td_cmd,
- td_offset, I40E_MAX_DATA_PER_TXD,
+ td_offset, CI_MAX_DATA_PER_TXD,
td_tag);
- buf_dma_addr += I40E_MAX_DATA_PER_TXD;
- slen -= I40E_MAX_DATA_PER_TXD;
+ buf_dma_addr += CI_MAX_DATA_PER_TXD;
+ slen -= CI_MAX_DATA_PER_TXD;
txe->last_id = tx_last;
tx_id = txe->next_id;
@@ -1261,7 +1258,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
} while (m_seg != NULL);
/* The last packet data descriptor needs End Of Packet (EOP) */
- td_cmd |= I40E_TX_DESC_CMD_EOP;
+ td_cmd |= CI_TX_DESC_CMD_EOP;
txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
@@ -1271,15 +1268,14 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
"%4u (port=%d queue=%d)",
tx_last, txq->port_id, txq->queue_id);
- td_cmd |= I40E_TX_DESC_CMD_RS;
+ td_cmd |= CI_TX_DESC_CMD_RS;
/* Update txq RS bit counters */
txq->nb_tx_used = 0;
}
txd->cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)td_cmd) <<
- I40E_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)td_cmd) << CI_TXD_QW1_CMD_S);
}
end_of_tx:
@@ -1305,8 +1301,8 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq)
const uint16_t m = tx_rs_thresh % I40E_TX_MAX_FREE_BUF_SZ;
if ((txq->ci_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
- rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
- rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+ rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
return 0;
txep = &txq->sw_ring[txq->tx_next_dd - (tx_rs_thresh - 1)];
@@ -1432,8 +1428,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq,
n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
i40e_tx_fill_hw_ring(txq, tx_pkts, n);
txr[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
- I40E_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
txq->tx_tail = 0;
}
@@ -1445,8 +1440,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq,
/* Determine if RS bit needs to be set */
if (txq->tx_tail > txq->tx_next_rs) {
txr[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
- I40E_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
if (txq->tx_next_rs >= txq->nb_tx_desc)
@@ -2371,9 +2365,9 @@ i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
}
status = &txq->ci_tx_ring[desc].cmd_type_offset_bsz;
- mask = rte_le_to_cpu_64(I40E_TXD_QW1_DTYPE_MASK);
+ mask = rte_le_to_cpu_64(CI_TXD_QW1_DTYPE_M);
expect = rte_cpu_to_le_64(
- I40E_TX_DESC_DTYPE_DESC_DONE << I40E_TXD_QW1_DTYPE_SHIFT);
+ CI_TX_DESC_DTYPE_DESC_DONE << CI_TXD_QW1_DTYPE_S);
if ((*status & mask) == expect)
return RTE_ETH_TX_DESC_DONE;
@@ -2869,7 +2863,7 @@ i40e_reset_tx_queue(struct ci_tx_queue *txq)
volatile struct ci_tx_desc *txd = &txq->ci_tx_ring[i];
txd->cmd_type_offset_bsz =
- rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
txe[prev].next_id = i;
diff --git a/drivers/net/intel/i40e/i40e_rxtx.h b/drivers/net/intel/i40e/i40e_rxtx.h
index ed173d8f17..307ffa3049 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.h
+++ b/drivers/net/intel/i40e/i40e_rxtx.h
@@ -47,8 +47,8 @@
#define I40E_RX_DESC_EXT_STATUS_FLEXBL_MASK 0x03
#define I40E_RX_DESC_EXT_STATUS_FLEXBL_FLEX 0x01
-#define I40E_TD_CMD (I40E_TX_DESC_CMD_ICRC |\
- I40E_TX_DESC_CMD_EOP)
+#define I40E_TD_CMD (CI_TX_DESC_CMD_ICRC |\
+ CI_TX_DESC_CMD_EOP)
enum i40e_header_split_mode {
i40e_header_split_none = 0,
@@ -110,19 +110,6 @@ enum i40e_header_split_mode {
#define I40E_TX_VECTOR_OFFLOADS RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
-/** Offload features */
-union i40e_tx_offload {
- uint64_t data;
- struct {
- uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
- uint64_t l3_len:9; /**< L3 (IP) Header Length. */
- uint64_t l4_len:8; /**< L4 Header Length. */
- uint64_t tso_segsz:16; /**< TCP TSO segment size */
- uint64_t outer_l2_len:8; /**< outer L2 Header Length */
- uint64_t outer_l3_len:16; /**< outer L3 Header Length */
- };
-};
-
int i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id);
int i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id);
int i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
index 81e9e2bc0b..9196916a04 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_altivec.c
@@ -449,9 +449,9 @@ static inline void
vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
- uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
__vector unsigned long descriptor = (__vector unsigned long){
pkt->buf_iova + pkt->data_off, high_qw};
@@ -477,7 +477,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
- uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+ uint64_t rs = CI_TX_DESC_CMD_RS | I40E_TD_CMD;
int i;
if (txq->nb_tx_free < txq->tx_free_thresh)
@@ -520,8 +520,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
- I40E_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
index b25b05d79d..012283d3ca 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx2.c
@@ -684,9 +684,9 @@ static inline void
vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
- uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
__m128i descriptor = _mm_set_epi64x(high_qw,
pkt->buf_iova + pkt->data_off);
@@ -697,8 +697,7 @@ static inline void
vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
- const uint64_t hi_qw_tmpl = (I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT));
+ const uint64_t hi_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | ((uint64_t)flags << CI_TXD_QW1_CMD_S));
/* if unaligned on 32-bit boundary, do one to align */
if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -709,13 +708,13 @@ vtx(volatile struct ci_tx_desc *txdp,
/* do two at a time while possible, in bursts */
for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
uint64_t hi_qw3 = hi_qw_tmpl |
- ((uint64_t)pkt[3]->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
+ ((uint64_t)pkt[3]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
uint64_t hi_qw2 = hi_qw_tmpl |
- ((uint64_t)pkt[2]->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
+ ((uint64_t)pkt[2]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
uint64_t hi_qw1 = hi_qw_tmpl |
- ((uint64_t)pkt[1]->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
uint64_t hi_qw0 = hi_qw_tmpl |
- ((uint64_t)pkt[0]->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
__m256i desc2_3 = _mm256_set_epi64x(
hi_qw3, pkt[3]->buf_iova + pkt[3]->data_off,
@@ -743,7 +742,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
- uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+ uint64_t rs = CI_TX_DESC_CMD_RS | I40E_TD_CMD;
if (txq->nb_tx_free < txq->tx_free_thresh)
ci_tx_free_bufs_vec(txq, i40e_tx_desc_done, false);
@@ -785,8 +784,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
- I40E_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
index 9a967faeee..def03e14e3 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_avx512.c
@@ -752,9 +752,9 @@ i40e_recv_scattered_pkts_vec_avx512(void *rx_queue,
static inline void
vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
{
- uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
__m128i descriptor = _mm_set_epi64x(high_qw,
pkt->buf_iova + pkt->data_off);
@@ -765,26 +765,17 @@ static inline void
vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
- const uint64_t hi_qw_tmpl = (I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT));
+ const uint64_t hi_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | ((uint64_t)flags << CI_TXD_QW1_CMD_S));
for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
- uint64_t hi_qw3 =
- hi_qw_tmpl |
- ((uint64_t)pkt[3]->data_len <<
- I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
- uint64_t hi_qw2 =
- hi_qw_tmpl |
- ((uint64_t)pkt[2]->data_len <<
- I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
- uint64_t hi_qw1 =
- hi_qw_tmpl |
- ((uint64_t)pkt[1]->data_len <<
- I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
- uint64_t hi_qw0 =
- hi_qw_tmpl |
- ((uint64_t)pkt[0]->data_len <<
- I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
+ uint64_t hi_qw3 = hi_qw_tmpl |
+ ((uint64_t)pkt[3]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw2 = hi_qw_tmpl |
+ ((uint64_t)pkt[2]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw1 = hi_qw_tmpl |
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw0 = hi_qw_tmpl |
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
__m512i desc0_3 =
_mm512_set_epi64
@@ -811,7 +802,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
- uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+ uint64_t rs = CI_TX_DESC_CMD_RS | I40E_TD_CMD;
if (txq->nb_tx_free < txq->tx_free_thresh)
ci_tx_free_bufs_vec(txq, i40e_tx_desc_done, false);
@@ -854,8 +845,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
- I40E_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
index 1fd7fc75bf..292a39501e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_common.h
@@ -16,8 +16,8 @@ static inline int
i40e_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
{
return (txq->ci_tx_ring[idx].cmd_type_offset_bsz &
- rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
- rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) ==
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
}
static inline void
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
index 0b95152232..839e53e93e 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_neon.c
@@ -600,9 +600,9 @@ static inline void
vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
- uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
uint64x2_t descriptor = {pkt->buf_iova + pkt->data_off, high_qw};
vst1q_u64(RTE_CAST_PTR(uint64_t *, txdp), descriptor);
@@ -627,7 +627,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
- uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+ uint64_t rs = CI_TX_DESC_CMD_RS | I40E_TD_CMD;
int i;
if (txq->nb_tx_free < txq->tx_free_thresh)
@@ -669,8 +669,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
- I40E_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
index 2a3baa415e..6b9a291173 100644
--- a/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/intel/i40e/i40e_rxtx_vec_sse.c
@@ -607,9 +607,9 @@ static inline void
vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
- uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
__m128i descriptor = _mm_set_epi64x(high_qw,
pkt->buf_iova + pkt->data_off);
@@ -635,7 +635,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = I40E_TD_CMD;
- uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+ uint64_t rs = CI_TX_DESC_CMD_RS | I40E_TD_CMD;
int i;
if (txq->nb_tx_free < txq->tx_free_thresh)
@@ -677,8 +677,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
- I40E_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 4605523673..9946e112e8 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -284,7 +284,7 @@ reset_tx_queue(struct ci_tx_queue *txq)
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
txq->ci_tx_ring[i].cmd_type_offset_bsz =
- rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
txe[prev].next_id = i;
@@ -2352,12 +2352,12 @@ iavf_fill_ctx_desc_cmd_field(volatile uint64_t *field, struct rte_mbuf *m,
/* TSO enabled */
if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
- cmd = IAVF_TX_CTX_DESC_TSO << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ cmd = CI_TX_CTX_DESC_TSO << IAVF_TXD_CTX_QW1_CMD_SHIFT;
if ((m->ol_flags & RTE_MBUF_F_TX_VLAN &&
vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) ||
m->ol_flags & RTE_MBUF_F_TX_QINQ) {
- cmd |= IAVF_TX_CTX_DESC_IL2TAG2
+ cmd |= CI_TX_CTX_DESC_IL2TAG2
<< IAVF_TXD_CTX_QW1_CMD_SHIFT;
}
@@ -2578,20 +2578,20 @@ iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1,
uint64_t offset = 0;
uint64_t l2tag1 = 0;
- *qw1 = IAVF_TX_DESC_DTYPE_DATA;
+ *qw1 = CI_TX_DESC_DTYPE_DATA;
- command = (uint64_t)IAVF_TX_DESC_CMD_ICRC;
+ command = (uint64_t)CI_TX_DESC_CMD_ICRC;
/* Descriptor based VLAN insertion */
if ((vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1) &&
m->ol_flags & RTE_MBUF_F_TX_VLAN) {
- command |= (uint64_t)IAVF_TX_DESC_CMD_IL2TAG1;
+ command |= (uint64_t)CI_TX_DESC_CMD_IL2TAG1;
l2tag1 |= m->vlan_tci;
}
/* Descriptor based QinQ insertion. vlan_flag specifies outer tag location. */
if (m->ol_flags & RTE_MBUF_F_TX_QINQ) {
- command |= (uint64_t)IAVF_TX_DESC_CMD_IL2TAG1;
+ command |= (uint64_t)CI_TX_DESC_CMD_IL2TAG1;
l2tag1 = vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1 ? m->vlan_tci_outer :
m->vlan_tci;
}
@@ -2604,32 +2604,32 @@ iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1,
if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK &&
!(m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD))
offset |= (m->outer_l2_len >> 1)
- << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+ << CI_TX_DESC_LEN_MACLEN_S;
else
offset |= (m->l2_len >> 1)
- << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+ << CI_TX_DESC_LEN_MACLEN_S;
/* Enable L3 checksum offloading inner */
if (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
if (m->ol_flags & RTE_MBUF_F_TX_IPV4) {
- command |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
- offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+ command |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ offset |= (m->l3_len >> 2) << CI_TX_DESC_LEN_IPLEN_S;
}
} else if (m->ol_flags & RTE_MBUF_F_TX_IPV4) {
- command |= IAVF_TX_DESC_CMD_IIPT_IPV4;
- offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+ command |= CI_TX_DESC_CMD_IIPT_IPV4;
+ offset |= (m->l3_len >> 2) << CI_TX_DESC_LEN_IPLEN_S;
} else if (m->ol_flags & RTE_MBUF_F_TX_IPV6) {
- command |= IAVF_TX_DESC_CMD_IIPT_IPV6;
- offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+ command |= CI_TX_DESC_CMD_IIPT_IPV6;
+ offset |= (m->l3_len >> 2) << CI_TX_DESC_LEN_IPLEN_S;
}
if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
- command |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
+ command |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
else
- command |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
+ command |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
offset |= (m->l4_len >> 2) <<
- IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ CI_TX_DESC_LEN_L4_LEN_S;
*qw1 = rte_cpu_to_le_64((((uint64_t)command <<
IAVF_TXD_DATA_QW1_CMD_SHIFT) & IAVF_TXD_DATA_QW1_CMD_MASK) |
@@ -2643,19 +2643,19 @@ iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1,
/* Enable L4 checksum offloads */
switch (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
case RTE_MBUF_F_TX_TCP_CKSUM:
- command |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
+ command |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
- IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
case RTE_MBUF_F_TX_SCTP_CKSUM:
- command |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
+ command |= CI_TX_DESC_CMD_L4T_EOFT_SCTP;
offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
- IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
case RTE_MBUF_F_TX_UDP_CKSUM:
- command |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
+ command |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
- IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
}
@@ -2675,8 +2675,7 @@ iavf_calc_pkt_desc(struct rte_mbuf *tx_pkt)
uint16_t count = 0;
while (txd != NULL) {
- count += (txd->data_len + IAVF_MAX_DATA_PER_TXD - 1) /
- IAVF_MAX_DATA_PER_TXD;
+ count += (txd->data_len + CI_MAX_DATA_PER_TXD - 1) / CI_MAX_DATA_PER_TXD;
txd = txd->next;
}
@@ -2882,14 +2881,14 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
buf_dma_addr = rte_mbuf_data_iova(mb_seg);
while ((mb_seg->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
RTE_MBUF_F_TX_UDP_SEG)) &&
- unlikely(slen > IAVF_MAX_DATA_PER_TXD)) {
+ unlikely(slen > CI_MAX_DATA_PER_TXD)) {
iavf_fill_data_desc(ddesc, ddesc_template,
- IAVF_MAX_DATA_PER_TXD, buf_dma_addr);
+ CI_MAX_DATA_PER_TXD, buf_dma_addr);
IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
- buf_dma_addr += IAVF_MAX_DATA_PER_TXD;
- slen -= IAVF_MAX_DATA_PER_TXD;
+ buf_dma_addr += CI_MAX_DATA_PER_TXD;
+ slen -= CI_MAX_DATA_PER_TXD;
txe->last_id = desc_idx_last;
desc_idx = txe->next_id;
@@ -2910,7 +2909,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
} while (mb_seg);
/* The last packet data descriptor needs End Of Packet (EOP) */
- ddesc_cmd = IAVF_TX_DESC_CMD_EOP;
+ ddesc_cmd = CI_TX_DESC_CMD_EOP;
txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_desc_required);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_desc_required);
@@ -2920,7 +2919,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
"%4u (port=%d queue=%d)",
desc_idx_last, txq->port_id, txq->queue_id);
- ddesc_cmd |= IAVF_TX_DESC_CMD_RS;
+ ddesc_cmd |= CI_TX_DESC_CMD_RS;
/* Update txq RS bit counters */
txq->nb_tx_used = 0;
@@ -4465,9 +4464,8 @@ iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset)
}
status = &txq->ci_tx_ring[desc].cmd_type_offset_bsz;
- mask = rte_le_to_cpu_64(IAVF_TXD_QW1_DTYPE_MASK);
- expect = rte_cpu_to_le_64(
- IAVF_TX_DESC_DTYPE_DESC_DONE << IAVF_TXD_QW1_DTYPE_SHIFT);
+ mask = rte_le_to_cpu_64(CI_TXD_QW1_DTYPE_M);
+ expect = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE << CI_TXD_QW1_DTYPE_S);
if ((*status & mask) == expect)
return RTE_ETH_TX_DESC_DONE;
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 14580c5b8b..86281aa965 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -162,10 +162,6 @@
#define IAVF_TX_OFFLOAD_NOTSUP_MASK \
(RTE_MBUF_F_TX_OFFLOAD_MASK ^ IAVF_TX_OFFLOAD_MASK)
-/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */
-#define IAVF_MAX_DATA_PER_TXD \
- (IAVF_TXD_QW1_TX_BUF_SZ_MASK >> IAVF_TXD_QW1_TX_BUF_SZ_SHIFT)
-
#define IAVF_TX_LLDP_DYNFIELD "intel_pmd_dynfield_tx_lldp"
#define IAVF_CHECK_TX_LLDP(m) \
((rte_pmd_iavf_tx_lldp_dynfield_offset > 0) && \
@@ -195,18 +191,6 @@ struct iavf_rx_queue_stats {
struct iavf_ipsec_crypto_stats ipsec_crypto;
};
-/* Offload features */
-union iavf_tx_offload {
- uint64_t data;
- struct {
- uint64_t l2_len:7; /* L2 (MAC) Header Length. */
- uint64_t l3_len:9; /* L3 (IP) Header Length. */
- uint64_t l4_len:8; /* L4 Header Length. */
- uint64_t tso_segsz:16; /* TCP TSO segment size */
- /* uint64_t unused : 24; */
- };
-};
-
/* Rx Flex Descriptor
* RxDID Profile ID 16-21
* Flex-field 0: RSS hash lower 16-bits
@@ -410,7 +394,7 @@ enum iavf_rx_flex_desc_ipsec_crypto_status {
#define IAVF_TXD_DATA_QW1_DTYPE_SHIFT (0)
-#define IAVF_TXD_DATA_QW1_DTYPE_MASK (0xFUL << IAVF_TXD_QW1_DTYPE_SHIFT)
+#define IAVF_TXD_DATA_QW1_DTYPE_MASK (0xFUL << CI_TXD_QW1_DTYPE_S)
#define IAVF_TXD_DATA_QW1_CMD_SHIFT (4)
#define IAVF_TXD_DATA_QW1_CMD_MASK (0x3FFUL << IAVF_TXD_DATA_QW1_CMD_SHIFT)
@@ -689,7 +673,7 @@ void iavf_dump_tx_descriptor(const struct ci_tx_queue *txq,
rte_le_to_cpu_64(tx_desc->cmd_type_offset_bsz &
rte_cpu_to_le_64(IAVF_TXD_DATA_QW1_DTYPE_MASK));
switch (type) {
- case IAVF_TX_DESC_DTYPE_DATA:
+ case CI_TX_DESC_DTYPE_DATA:
name = "Tx_data_desc";
break;
case IAVF_TX_DESC_DTYPE_CONTEXT:
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
index 82861b8398..e92a84a51a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx2.c
@@ -1633,10 +1633,9 @@ static __rte_always_inline void
iavf_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags, bool offload, uint8_t vlan_flag)
{
- uint64_t high_qw =
- (IAVF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
if (offload)
iavf_txd_enable_offload(pkt, &high_qw, vlan_flag);
@@ -1649,8 +1648,7 @@ static __rte_always_inline void
iavf_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags, bool offload, uint8_t vlan_flag)
{
- const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT));
+ const uint64_t hi_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | ((uint64_t)flags << CI_TXD_QW1_CMD_S));
/* if unaligned on 32-bit boundary, do one to align */
if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1660,28 +1658,20 @@ iavf_vtx(volatile struct ci_tx_desc *txdp,
/* do two at a time while possible, in bursts */
for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
- uint64_t hi_qw3 =
- hi_qw_tmpl |
- ((uint64_t)pkt[3]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+ uint64_t hi_qw3 = hi_qw_tmpl |
+ ((uint64_t)pkt[3]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload)
iavf_txd_enable_offload(pkt[3], &hi_qw3, vlan_flag);
- uint64_t hi_qw2 =
- hi_qw_tmpl |
- ((uint64_t)pkt[2]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+ uint64_t hi_qw2 = hi_qw_tmpl |
+ ((uint64_t)pkt[2]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload)
iavf_txd_enable_offload(pkt[2], &hi_qw2, vlan_flag);
- uint64_t hi_qw1 =
- hi_qw_tmpl |
- ((uint64_t)pkt[1]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+ uint64_t hi_qw1 = hi_qw_tmpl |
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload)
iavf_txd_enable_offload(pkt[1], &hi_qw1, vlan_flag);
- uint64_t hi_qw0 =
- hi_qw_tmpl |
- ((uint64_t)pkt[0]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+ uint64_t hi_qw0 = hi_qw_tmpl |
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload)
iavf_txd_enable_offload(pkt[0], &hi_qw0, vlan_flag);
@@ -1717,8 +1707,8 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
/* bit2 is reserved and must be set to 1 according to Spec */
- uint64_t flags = IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_ICRC;
- uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
+ uint64_t flags = CI_TX_DESC_CMD_EOP | CI_TX_DESC_CMD_ICRC;
+ uint64_t rs = CI_TX_DESC_CMD_RS | flags;
if (txq->nb_tx_free < txq->tx_free_thresh)
ci_tx_free_bufs_vec(txq, iavf_tx_desc_done, false);
@@ -1761,8 +1751,7 @@ iavf_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
- IAVF_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
index ad1b0b90cd..ff9d3c009a 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_avx512.c
@@ -1844,10 +1844,9 @@ iavf_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags,
bool offload, uint8_t vlan_flag)
{
- uint64_t high_qw =
- (IAVF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
if (offload)
iavf_txd_enable_offload(pkt, &high_qw, vlan_flag);
@@ -1863,8 +1862,7 @@ iavf_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags,
bool offload, uint8_t vlan_flag)
{
- const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT));
+ const uint64_t hi_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | (flags << CI_TXD_QW1_CMD_S));
/* if unaligned on 32-bit boundary, do one to align */
if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1874,22 +1872,14 @@ iavf_vtx(volatile struct ci_tx_desc *txdp,
/* do 4 at a time while possible, in bursts */
for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
- uint64_t hi_qw3 =
- hi_qw_tmpl |
- ((uint64_t)pkt[3]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
- uint64_t hi_qw2 =
- hi_qw_tmpl |
- ((uint64_t)pkt[2]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
- uint64_t hi_qw1 =
- hi_qw_tmpl |
- ((uint64_t)pkt[1]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
- uint64_t hi_qw0 =
- hi_qw_tmpl |
- ((uint64_t)pkt[0]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+ uint64_t hi_qw3 = hi_qw_tmpl |
+ ((uint64_t)pkt[3]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw2 = hi_qw_tmpl |
+ ((uint64_t)pkt[2]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw1 = hi_qw_tmpl |
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw0 = hi_qw_tmpl |
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload) {
iavf_txd_enable_offload(pkt[3], &hi_qw3, vlan_flag);
iavf_txd_enable_offload(pkt[2], &hi_qw2, vlan_flag);
@@ -2093,9 +2083,9 @@ ctx_vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt,
if (IAVF_CHECK_TX_LLDP(pkt))
high_ctx_qw |= IAVF_TX_CTX_DESC_SWTCH_UPLINK
<< IAVF_TXD_CTX_QW1_CMD_SHIFT;
- uint64_t high_data_qw = (IAVF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_data_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
if (offload)
iavf_txd_enable_offload(pkt, &high_data_qw, vlan_flag);
@@ -2110,8 +2100,7 @@ ctx_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags,
bool offload, uint8_t vlan_flag)
{
- uint64_t hi_data_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT));
+ uint64_t hi_data_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | (flags << CI_TXD_QW1_CMD_S));
/* if unaligned on 32-bit boundary, do one to align */
if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -2128,11 +2117,9 @@ ctx_vtx(volatile struct ci_tx_desc *txdp,
uint64_t hi_data_qw0 = 0;
hi_data_qw1 = hi_data_qw_tmpl |
- ((uint64_t)pkt[1]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
hi_data_qw0 = hi_data_qw_tmpl |
- ((uint64_t)pkt[0]->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
#ifdef IAVF_TX_VLAN_QINQ_OFFLOAD
if (offload) {
@@ -2140,13 +2127,11 @@ ctx_vtx(volatile struct ci_tx_desc *txdp,
uint64_t qinq_tag = vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2 ?
(uint64_t)pkt[1]->vlan_tci :
(uint64_t)pkt[1]->vlan_tci_outer;
- hi_ctx_qw1 |= IAVF_TX_CTX_DESC_IL2TAG2 <<
- IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ hi_ctx_qw1 |= CI_TX_CTX_DESC_IL2TAG2 << CI_TXD_QW1_CMD_S;
low_ctx_qw1 |= qinq_tag << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
} else if (pkt[1]->ol_flags & RTE_MBUF_F_TX_VLAN &&
vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
- hi_ctx_qw1 |=
- IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ hi_ctx_qw1 |= IAVF_TX_CTX_DESC_IL2TAG2 << CI_TXD_QW1_CMD_S;
low_ctx_qw1 |=
(uint64_t)pkt[1]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
}
@@ -2154,7 +2139,7 @@ ctx_vtx(volatile struct ci_tx_desc *txdp,
#endif
if (IAVF_CHECK_TX_LLDP(pkt[1]))
hi_ctx_qw1 |= IAVF_TX_CTX_DESC_SWTCH_UPLINK
- << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ << CI_TXD_QW1_CMD_S;
#ifdef IAVF_TX_VLAN_QINQ_OFFLOAD
if (offload) {
@@ -2162,21 +2147,18 @@ ctx_vtx(volatile struct ci_tx_desc *txdp,
uint64_t qinq_tag = vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2 ?
(uint64_t)pkt[0]->vlan_tci :
(uint64_t)pkt[0]->vlan_tci_outer;
- hi_ctx_qw0 |= IAVF_TX_CTX_DESC_IL2TAG2 <<
- IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ hi_ctx_qw0 |= IAVF_TX_CTX_DESC_IL2TAG2 << CI_TXD_QW1_CMD_S;
low_ctx_qw0 |= qinq_tag << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
} else if (pkt[0]->ol_flags & RTE_MBUF_F_TX_VLAN &&
vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
- hi_ctx_qw0 |=
- IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ hi_ctx_qw0 |= IAVF_TX_CTX_DESC_IL2TAG2 << CI_TXD_QW1_CMD_S;
low_ctx_qw0 |=
(uint64_t)pkt[0]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
}
}
#endif
if (IAVF_CHECK_TX_LLDP(pkt[0]))
- hi_ctx_qw0 |= IAVF_TX_CTX_DESC_SWTCH_UPLINK
- << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ hi_ctx_qw0 |= IAVF_TX_CTX_DESC_SWTCH_UPLINK << CI_TXD_QW1_CMD_S;
if (offload) {
iavf_txd_enable_offload(pkt[1], &hi_data_qw1, vlan_flag);
@@ -2207,8 +2189,8 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
/* bit2 is reserved and must be set to 1 according to Spec */
- uint64_t flags = IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_ICRC;
- uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
+ uint64_t flags = CI_TX_DESC_CMD_EOP | CI_TX_DESC_CMD_ICRC;
+ uint64_t rs = CI_TX_DESC_CMD_RS | flags;
if (txq->nb_tx_free < txq->tx_free_thresh)
ci_tx_free_bufs_vec(txq, iavf_tx_desc_done, false);
@@ -2253,8 +2235,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
- IAVF_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
@@ -2275,8 +2256,8 @@ iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, nb_mbuf, tx_id;
/* bit2 is reserved and must be set to 1 according to Spec */
- uint64_t flags = IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_ICRC;
- uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
+ uint64_t flags = CI_TX_DESC_CMD_EOP | CI_TX_DESC_CMD_ICRC;
+ uint64_t rs = CI_TX_DESC_CMD_RS | flags;
if (txq->nb_tx_free < txq->tx_free_thresh)
ci_tx_free_bufs_vec(txq, iavf_tx_desc_done, true);
@@ -2321,8 +2302,7 @@ iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
- IAVF_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
index 1832b76f89..1538a44892 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_common.h
@@ -15,8 +15,8 @@ static inline int
iavf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
{
return (txq->ci_tx_ring[idx].cmd_type_offset_bsz &
- rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) ==
- rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) ==
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
}
static inline void
@@ -147,26 +147,26 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
/* Set MACLEN */
if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
td_offset |= (tx_pkt->outer_l2_len >> 1)
- << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+ << CI_TX_DESC_LEN_MACLEN_S;
else
td_offset |= (tx_pkt->l2_len >> 1)
- << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+ << CI_TX_DESC_LEN_MACLEN_S;
/* Enable L3 checksum offloads */
if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
if (ol_flags & RTE_MBUF_F_TX_IPV4) {
- td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
td_offset |= (tx_pkt->l3_len >> 2) <<
- IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+ CI_TX_DESC_LEN_IPLEN_S;
}
} else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
- td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
+ td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4;
td_offset |= (tx_pkt->l3_len >> 2) <<
- IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+ CI_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
- td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6;
+ td_cmd |= CI_TX_DESC_CMD_IIPT_IPV6;
td_offset |= (tx_pkt->l3_len >> 2) <<
- IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+ CI_TX_DESC_LEN_IPLEN_S;
}
/* Enable L4 checksum offloads */
@@ -190,7 +190,7 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
break;
}
- *txd_hi |= ((uint64_t)td_offset) << IAVF_TXD_QW1_OFFSET_SHIFT;
+ *txd_hi |= ((uint64_t)td_offset) << CI_TXD_QW1_OFFSET_S;
#endif
#ifdef IAVF_TX_VLAN_QINQ_OFFLOAD
@@ -198,17 +198,15 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
/* vlan_flag specifies outer tag location for QinQ. */
if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1)
- *txd_hi |= ((uint64_t)tx_pkt->vlan_tci_outer <<
- IAVF_TXD_QW1_L2TAG1_SHIFT);
+ *txd_hi |= ((uint64_t)tx_pkt->vlan_tci_outer << CI_TXD_QW1_L2TAG1_S);
else
- *txd_hi |= ((uint64_t)tx_pkt->vlan_tci <<
- IAVF_TXD_QW1_L2TAG1_SHIFT);
+ *txd_hi |= ((uint64_t)tx_pkt->vlan_tci << CI_TXD_QW1_L2TAG1_S);
} else if (ol_flags & RTE_MBUF_F_TX_VLAN && vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1) {
- td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
- *txd_hi |= ((uint64_t)tx_pkt->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT);
+ td_cmd |= CI_TX_DESC_CMD_IL2TAG1;
+ *txd_hi |= ((uint64_t)tx_pkt->vlan_tci << CI_TXD_QW1_L2TAG1_S);
}
#endif
- *txd_hi |= ((uint64_t)td_cmd) << IAVF_TXD_QW1_CMD_SHIFT;
+ *txd_hi |= ((uint64_t)td_cmd) << CI_TXD_QW1_CMD_S;
}
#endif
diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
index 89ec05fa5d..7c65ce0873 100644
--- a/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/intel/iavf/iavf_rxtx_vec_sse.c
@@ -1244,11 +1244,9 @@ iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
static inline void
vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
{
- uint64_t high_qw =
- (IAVF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT) |
- ((uint64_t)pkt->data_len <<
- IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
__m128i descriptor = _mm_set_epi64x(high_qw,
pkt->buf_iova + pkt->data_off);
@@ -1273,8 +1271,8 @@ iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
- uint64_t flags = IAVF_TX_DESC_CMD_EOP | 0x04; /* bit 2 must be set */
- uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
+ uint64_t flags = CI_TX_DESC_CMD_EOP | 0x04; /* bit 2 must be set */
+ uint64_t rs = CI_TX_DESC_CMD_RS | flags;
int i;
if (txq->nb_tx_free < txq->tx_free_thresh)
@@ -1317,8 +1315,7 @@ iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
- IAVF_TXD_QW1_CMD_SHIFT);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index 5f537b4c12..4ceecc15c6 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -406,7 +406,7 @@ reset_tx_queue(struct ci_tx_queue *txq)
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
txq->ci_tx_ring[i].cmd_type_offset_bsz =
- rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
txe[prev].next_id = i;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 0a6ca993c6..5864238092 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -1120,7 +1120,7 @@ ice_reset_tx_queue(struct ci_tx_queue *txq)
volatile struct ci_tx_desc *txd = &txq->ci_tx_ring[i];
txd->cmd_type_offset_bsz =
- rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
txe[prev].next_id = i;
@@ -2548,9 +2548,8 @@ ice_tx_descriptor_status(void *tx_queue, uint16_t offset)
}
status = &txq->ci_tx_ring[desc].cmd_type_offset_bsz;
- mask = rte_cpu_to_le_64(ICE_TXD_QW1_DTYPE_M);
- expect = rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE <<
- ICE_TXD_QW1_DTYPE_S);
+ mask = rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M);
+ expect = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE << CI_TXD_QW1_DTYPE_S);
if ((*status & mask) == expect)
return RTE_ETH_TX_DESC_DONE;
@@ -2896,7 +2895,7 @@ ice_recv_pkts(void *rx_queue,
static inline void
ice_parse_tunneling_params(uint64_t ol_flags,
- union ice_tx_offload tx_offload,
+ union ci_tx_offload tx_offload,
uint32_t *cd_tunneling)
{
/* EIPT: External (outer) IP header type */
@@ -2957,58 +2956,58 @@ static inline void
ice_txd_enable_checksum(uint64_t ol_flags,
uint32_t *td_cmd,
uint32_t *td_offset,
- union ice_tx_offload tx_offload)
+ union ci_tx_offload tx_offload)
{
/* Set MACLEN */
if (!(ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK))
*td_offset |= (tx_offload.l2_len >> 1)
- << ICE_TX_DESC_LEN_MACLEN_S;
+ << CI_TX_DESC_LEN_MACLEN_S;
/* Enable L3 checksum offloads */
if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
- *td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
*td_offset |= (tx_offload.l3_len >> 2) <<
- ICE_TX_DESC_LEN_IPLEN_S;
+ CI_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
- *td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4;
*td_offset |= (tx_offload.l3_len >> 2) <<
- ICE_TX_DESC_LEN_IPLEN_S;
+ CI_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
- *td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV6;
*td_offset |= (tx_offload.l3_len >> 2) <<
- ICE_TX_DESC_LEN_IPLEN_S;
+ CI_TX_DESC_LEN_IPLEN_S;
}
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
- *td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
*td_offset |= (tx_offload.l4_len >> 2) <<
- ICE_TX_DESC_LEN_L4_LEN_S;
+ CI_TX_DESC_LEN_L4_LEN_S;
return;
}
if (ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
- *td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
*td_offset |= (tx_offload.l4_len >> 2) <<
- ICE_TX_DESC_LEN_L4_LEN_S;
+ CI_TX_DESC_LEN_L4_LEN_S;
return;
}
/* Enable L4 checksum offloads */
switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
case RTE_MBUF_F_TX_TCP_CKSUM:
- *td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
*td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
- ICE_TX_DESC_LEN_L4_LEN_S;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
case RTE_MBUF_F_TX_SCTP_CKSUM:
- *td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_SCTP;
*td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
- ICE_TX_DESC_LEN_L4_LEN_S;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
case RTE_MBUF_F_TX_UDP_CKSUM:
- *td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
*td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
- ICE_TX_DESC_LEN_L4_LEN_S;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
default:
break;
@@ -3022,11 +3021,11 @@ ice_build_ctob(uint32_t td_cmd,
uint16_t size,
uint32_t td_tag)
{
- return rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
- ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
- ((uint64_t)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
- ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
+ return rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)size << CI_TXD_QW1_TX_BUF_SZ_S) |
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
}
/* Check if the context descriptor is needed for TX offloading */
@@ -3045,7 +3044,7 @@ ice_calc_context_desc(uint64_t flags)
/* set ice TSO context descriptor */
static inline uint64_t
-ice_set_tso_ctx(struct rte_mbuf *mbuf, union ice_tx_offload tx_offload)
+ice_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
{
uint64_t ctx_desc = 0;
uint32_t cd_cmd, hdr_len, cd_tso_len;
@@ -3059,18 +3058,15 @@ ice_set_tso_ctx(struct rte_mbuf *mbuf, union ice_tx_offload tx_offload)
hdr_len += (mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
tx_offload.outer_l2_len + tx_offload.outer_l3_len : 0;
- cd_cmd = ICE_TX_CTX_DESC_TSO;
+ cd_cmd = CI_TX_CTX_DESC_TSO;
cd_tso_len = mbuf->pkt_len - hdr_len;
- ctx_desc |= ((uint64_t)cd_cmd << ICE_TXD_CTX_QW1_CMD_S) |
+ ctx_desc |= ((uint64_t)cd_cmd << CI_TXD_QW1_CMD_S) |
((uint64_t)cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
((uint64_t)mbuf->tso_segsz << ICE_TXD_CTX_QW1_MSS_S);
return ctx_desc;
}
-/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */
-#define ICE_MAX_DATA_PER_TXD \
- (ICE_TXD_QW1_TX_BUF_SZ_M >> ICE_TXD_QW1_TX_BUF_SZ_S)
/* Calculate the number of TX descriptors needed for each pkt */
static inline uint16_t
ice_calc_pkt_desc(struct rte_mbuf *tx_pkt)
@@ -3079,7 +3075,7 @@ ice_calc_pkt_desc(struct rte_mbuf *tx_pkt)
uint16_t count = 0;
while (txd != NULL) {
- count += DIV_ROUND_UP(txd->data_len, ICE_MAX_DATA_PER_TXD);
+ count += DIV_ROUND_UP(txd->data_len, CI_MAX_DATA_PER_TXD);
txd = txd->next;
}
@@ -3109,7 +3105,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
uint16_t slen;
uint64_t buf_dma_addr;
uint64_t ol_flags;
- union ice_tx_offload tx_offload = {0};
+ union ci_tx_offload tx_offload = {0};
txq = tx_queue;
sw_ring = txq->sw_ring;
@@ -3177,7 +3173,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Descriptor based VLAN insertion */
if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
- td_cmd |= ICE_TX_DESC_CMD_IL2TAG1;
+ td_cmd |= CI_TX_DESC_CMD_IL2TAG1;
td_tag = tx_pkt->vlan_tci;
}
@@ -3185,7 +3181,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
cd_tunneling_params = 0;
if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
td_offset |= (tx_offload.outer_l2_len >> 1)
- << ICE_TX_DESC_LEN_MACLEN_S;
+ << CI_TX_DESC_LEN_MACLEN_S;
ice_parse_tunneling_params(ol_flags, tx_offload,
&cd_tunneling_params);
}
@@ -3215,8 +3211,8 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
ice_set_tso_ctx(tx_pkt, tx_offload);
else if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
cd_type_cmd_tso_mss |=
- ((uint64_t)ICE_TX_CTX_DESC_TSYN <<
- ICE_TXD_CTX_QW1_CMD_S) |
+ ((uint64_t)CI_TX_CTX_DESC_TSYN <<
+ CI_TXD_QW1_CMD_S) |
(((uint64_t)txq->ice_vsi->adapter->ptp_tx_index <<
ICE_TXD_CTX_QW1_TSYN_S) & ICE_TXD_CTX_QW1_TSYN_M);
@@ -3227,8 +3223,8 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
if (ol_flags & RTE_MBUF_F_TX_QINQ) {
cd_l2tag2 = tx_pkt->vlan_tci_outer;
cd_type_cmd_tso_mss |=
- ((uint64_t)ICE_TX_CTX_DESC_IL2TAG2 <<
- ICE_TXD_CTX_QW1_CMD_S);
+ ((uint64_t)CI_TX_CTX_DESC_IL2TAG2 <<
+ CI_TXD_QW1_CMD_S);
}
ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
ctx_txd->qw1 =
@@ -3253,18 +3249,16 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
buf_dma_addr = rte_mbuf_data_iova(m_seg);
while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) &&
- unlikely(slen > ICE_MAX_DATA_PER_TXD)) {
+ unlikely(slen > CI_MAX_DATA_PER_TXD)) {
txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz =
- rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
- ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
- ((uint64_t)ICE_MAX_DATA_PER_TXD <<
- ICE_TXD_QW1_TX_BUF_SZ_S) |
- ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
+ txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)CI_MAX_DATA_PER_TXD << CI_TXD_QW1_TX_BUF_SZ_S) |
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
- buf_dma_addr += ICE_MAX_DATA_PER_TXD;
- slen -= ICE_MAX_DATA_PER_TXD;
+ buf_dma_addr += CI_MAX_DATA_PER_TXD;
+ slen -= CI_MAX_DATA_PER_TXD;
txe->last_id = tx_last;
tx_id = txe->next_id;
@@ -3274,12 +3268,11 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
}
txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz =
- rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
- ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
- ((uint64_t)slen << ICE_TXD_QW1_TX_BUF_SZ_S) |
- ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
+ txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S) |
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
txe->last_id = tx_last;
tx_id = txe->next_id;
@@ -3288,7 +3281,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
} while (m_seg);
/* fill the last descriptor with End of Packet (EOP) bit */
- td_cmd |= ICE_TX_DESC_CMD_EOP;
+ td_cmd |= CI_TX_DESC_CMD_EOP;
txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
@@ -3299,14 +3292,13 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
"%4u (port=%d queue=%d)",
tx_last, txq->port_id, txq->queue_id);
- td_cmd |= ICE_TX_DESC_CMD_RS;
+ td_cmd |= CI_TX_DESC_CMD_RS;
/* Update txq RS bit counters */
txq->nb_tx_used = 0;
}
txd->cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)td_cmd) <<
- ICE_TXD_QW1_CMD_S);
+ rte_cpu_to_le_64(((uint64_t)td_cmd) << CI_TXD_QW1_CMD_S);
if (txq->tsq != NULL && txq->tsq->ts_flag > 0) {
uint64_t txtime = *RTE_MBUF_DYNFIELD(tx_pkt,
@@ -3353,8 +3345,8 @@ ice_tx_free_bufs(struct ci_tx_queue *txq)
uint16_t i;
if ((txq->ci_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
- rte_cpu_to_le_64(ICE_TXD_QW1_DTYPE_M)) !=
- rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE))
+ rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
return 0;
txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)];
@@ -3579,8 +3571,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq,
n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
ice_tx_fill_hw_ring(txq, tx_pkts, n);
txr[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) <<
- ICE_TXD_QW1_CMD_S);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
txq->tx_tail = 0;
}
@@ -3592,8 +3583,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq,
/* Determine if RS bit needs to be set */
if (txq->tx_tail > txq->tx_next_rs) {
txr[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) <<
- ICE_TXD_QW1_CMD_S);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
if (txq->tx_next_rs >= txq->nb_tx_desc)
@@ -4852,9 +4842,9 @@ ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
txdp = &txq->ci_tx_ring[txq->tx_tail + 1];
txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
- td_cmd = ICE_TX_DESC_CMD_EOP |
- ICE_TX_DESC_CMD_RS |
- ICE_TX_DESC_CMD_DUMMY;
+ td_cmd = CI_TX_DESC_CMD_EOP |
+ CI_TX_DESC_CMD_RS |
+ CI_TX_DESC_CMD_DUMMY;
txdp->cmd_type_offset_bsz =
ice_build_ctob(td_cmd, 0, ICE_FDIR_PKT_LEN, 0);
@@ -4865,9 +4855,8 @@ ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc)
/* Update the tx tail register */
ICE_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
for (i = 0; i < ICE_FDIR_MAX_WAIT_US; i++) {
- if ((txdp->cmd_type_offset_bsz &
- rte_cpu_to_le_64(ICE_TXD_QW1_DTYPE_M)) ==
- rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE))
+ if ((txdp->cmd_type_offset_bsz & rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) ==
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
break;
rte_delay_us(1);
}
diff --git a/drivers/net/intel/ice/ice_rxtx.h b/drivers/net/intel/ice/ice_rxtx.h
index d7e8c1b0c4..3462196f6f 100644
--- a/drivers/net/intel/ice/ice_rxtx.h
+++ b/drivers/net/intel/ice/ice_rxtx.h
@@ -46,7 +46,7 @@
#define ICE_SUPPORT_CHAIN_NUM 5
-#define ICE_TD_CMD ICE_TX_DESC_CMD_EOP
+#define ICE_TD_CMD CI_TX_DESC_CMD_EOP
#define ICE_VPMD_RX_BURST CI_VPMD_RX_BURST
#define ICE_VPMD_TX_BURST 32
@@ -169,19 +169,6 @@ struct ice_txtime {
const struct rte_memzone *ts_mz;
};
-/* Offload features */
-union ice_tx_offload {
- uint64_t data;
- struct {
- uint64_t l2_len:7; /* L2 (MAC) Header Length. */
- uint64_t l3_len:9; /* L3 (IP) Header Length. */
- uint64_t l4_len:8; /* L4 Header Length. */
- uint64_t tso_segsz:16; /* TCP TSO segment size */
- uint64_t outer_l2_len:8; /* outer L2 Header Length */
- uint64_t outer_l3_len:16; /* outer L3 Header Length */
- };
-};
-
/* Rx Flex Descriptor for Comms Package Profile
* RxDID Profile ID 22 (swap Hash and FlowID)
* Flex-field 0: Flow ID lower 16-bits
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
index d553c438f8..d0237a0c82 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx2.c
@@ -777,10 +777,9 @@ static __rte_always_inline void
ice_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags, bool offload)
{
- uint64_t high_qw =
- (ICE_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << ICE_TXD_QW1_CMD_S) |
- ((uint64_t)pkt->data_len << ICE_TXD_QW1_TX_BUF_SZ_S));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
if (offload)
ice_txd_enable_offload(pkt, &high_qw);
@@ -792,8 +791,7 @@ static __rte_always_inline void
ice_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags, bool offload)
{
- const uint64_t hi_qw_tmpl = (ICE_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << ICE_TXD_QW1_CMD_S));
+ const uint64_t hi_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | (flags << CI_TXD_QW1_CMD_S));
/* if unaligned on 32-bit boundary, do one to align */
if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -801,30 +799,22 @@ ice_vtx(volatile struct ci_tx_desc *txdp,
nb_pkts--, txdp++, pkt++;
}
- /* do two at a time while possible, in bursts */
+ /* do four at a time while possible, in bursts */
for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
- uint64_t hi_qw3 =
- hi_qw_tmpl |
- ((uint64_t)pkt[3]->data_len <<
- ICE_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw3 = hi_qw_tmpl |
+ ((uint64_t)pkt[3]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload)
ice_txd_enable_offload(pkt[3], &hi_qw3);
- uint64_t hi_qw2 =
- hi_qw_tmpl |
- ((uint64_t)pkt[2]->data_len <<
- ICE_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw2 = hi_qw_tmpl |
+ ((uint64_t)pkt[2]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload)
ice_txd_enable_offload(pkt[2], &hi_qw2);
- uint64_t hi_qw1 =
- hi_qw_tmpl |
- ((uint64_t)pkt[1]->data_len <<
- ICE_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw1 = hi_qw_tmpl |
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload)
ice_txd_enable_offload(pkt[1], &hi_qw1);
- uint64_t hi_qw0 =
- hi_qw_tmpl |
- ((uint64_t)pkt[0]->data_len <<
- ICE_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw0 = hi_qw_tmpl |
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (offload)
ice_txd_enable_offload(pkt[0], &hi_qw0);
@@ -856,7 +846,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = ICE_TD_CMD;
- uint64_t rs = ICE_TX_DESC_CMD_RS | ICE_TD_CMD;
+ uint64_t rs = CI_TX_DESC_CMD_RS | ICE_TD_CMD;
/* cross rx_thresh boundary is not allowed */
nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
@@ -901,8 +891,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) <<
- ICE_TXD_QW1_CMD_S);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
index d42f41461f..9ef0777b9b 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_avx512.c
@@ -850,10 +850,9 @@ static __rte_always_inline void
ice_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags, bool do_offload)
{
- uint64_t high_qw =
- (ICE_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << ICE_TXD_QW1_CMD_S) |
- ((uint64_t)pkt->data_len << ICE_TXD_QW1_TX_BUF_SZ_S));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
if (do_offload)
ice_txd_enable_offload(pkt, &high_qw);
@@ -866,32 +865,23 @@ static __rte_always_inline void
ice_vtx(volatile struct ci_tx_desc *txdp, struct rte_mbuf **pkt,
uint16_t nb_pkts, uint64_t flags, bool do_offload)
{
- const uint64_t hi_qw_tmpl = (ICE_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << ICE_TXD_QW1_CMD_S));
+ const uint64_t hi_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | (flags << CI_TXD_QW1_CMD_S));
for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
- uint64_t hi_qw3 =
- hi_qw_tmpl |
- ((uint64_t)pkt[3]->data_len <<
- ICE_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw3 = hi_qw_tmpl |
+ ((uint64_t)pkt[3]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (do_offload)
ice_txd_enable_offload(pkt[3], &hi_qw3);
- uint64_t hi_qw2 =
- hi_qw_tmpl |
- ((uint64_t)pkt[2]->data_len <<
- ICE_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw2 = hi_qw_tmpl |
+ ((uint64_t)pkt[2]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (do_offload)
ice_txd_enable_offload(pkt[2], &hi_qw2);
- uint64_t hi_qw1 =
- hi_qw_tmpl |
- ((uint64_t)pkt[1]->data_len <<
- ICE_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw1 = hi_qw_tmpl |
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (do_offload)
ice_txd_enable_offload(pkt[1], &hi_qw1);
- uint64_t hi_qw0 =
- hi_qw_tmpl |
- ((uint64_t)pkt[0]->data_len <<
- ICE_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw0 = hi_qw_tmpl |
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
if (do_offload)
ice_txd_enable_offload(pkt[0], &hi_qw0);
@@ -920,7 +910,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = ICE_TD_CMD;
- uint64_t rs = ICE_TX_DESC_CMD_RS | ICE_TD_CMD;
+ uint64_t rs = CI_TX_DESC_CMD_RS | ICE_TD_CMD;
/* cross rx_thresh boundary is not allowed */
nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
@@ -966,8 +956,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) <<
- ICE_TXD_QW1_CMD_S);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_common.h b/drivers/net/intel/ice/ice_rxtx_vec_common.h
index 8ba591e403..1d83a087cc 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_common.h
+++ b/drivers/net/intel/ice/ice_rxtx_vec_common.h
@@ -12,8 +12,8 @@ static inline int
ice_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
{
return (txq->ci_tx_ring[idx].cmd_type_offset_bsz &
- rte_cpu_to_le_64(ICE_TXD_QW1_DTYPE_M)) ==
- rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) ==
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
}
static inline void
@@ -124,53 +124,52 @@ ice_txd_enable_offload(struct rte_mbuf *tx_pkt,
/* Tx Checksum Offload */
/* SET MACLEN */
td_offset |= (tx_pkt->l2_len >> 1) <<
- ICE_TX_DESC_LEN_MACLEN_S;
+ CI_TX_DESC_LEN_MACLEN_S;
/* Enable L3 checksum offload */
if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
- td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
td_offset |= (tx_pkt->l3_len >> 2) <<
- ICE_TX_DESC_LEN_IPLEN_S;
+ CI_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
- td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
+ td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4;
td_offset |= (tx_pkt->l3_len >> 2) <<
- ICE_TX_DESC_LEN_IPLEN_S;
+ CI_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
- td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
+ td_cmd |= CI_TX_DESC_CMD_IIPT_IPV6;
td_offset |= (tx_pkt->l3_len >> 2) <<
- ICE_TX_DESC_LEN_IPLEN_S;
+ CI_TX_DESC_LEN_IPLEN_S;
}
/* Enable L4 checksum offloads */
switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
case RTE_MBUF_F_TX_TCP_CKSUM:
- td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+ td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
- ICE_TX_DESC_LEN_L4_LEN_S;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
case RTE_MBUF_F_TX_SCTP_CKSUM:
- td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
+ td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_SCTP;
td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
- ICE_TX_DESC_LEN_L4_LEN_S;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
case RTE_MBUF_F_TX_UDP_CKSUM:
- td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+ td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
- ICE_TX_DESC_LEN_L4_LEN_S;
+ CI_TX_DESC_LEN_L4_LEN_S;
break;
default:
break;
}
- *txd_hi |= ((uint64_t)td_offset) << ICE_TXD_QW1_OFFSET_S;
+ *txd_hi |= ((uint64_t)td_offset) << CI_TXD_QW1_OFFSET_S;
- /* Tx VLAN insertion Offload */
+ /* Tx VLAN/QINQ insertion Offload */
if (ol_flags & RTE_MBUF_F_TX_VLAN) {
- td_cmd |= ICE_TX_DESC_CMD_IL2TAG1;
- *txd_hi |= ((uint64_t)tx_pkt->vlan_tci <<
- ICE_TXD_QW1_L2TAG1_S);
+ td_cmd |= CI_TX_DESC_CMD_IL2TAG1;
+ *txd_hi |= ((uint64_t)tx_pkt->vlan_tci << CI_TXD_QW1_L2TAG1_S);
}
- *txd_hi |= ((uint64_t)td_cmd) << ICE_TXD_QW1_CMD_S;
+ *txd_hi |= ((uint64_t)td_cmd) << CI_TXD_QW1_CMD_S;
}
#endif
diff --git a/drivers/net/intel/ice/ice_rxtx_vec_sse.c b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
index c65240d659..c4920a1360 100644
--- a/drivers/net/intel/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/intel/ice/ice_rxtx_vec_sse.c
@@ -599,10 +599,9 @@ static inline void
ice_vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt,
uint64_t flags)
{
- uint64_t high_qw =
- (ICE_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << ICE_TXD_QW1_CMD_S) |
- ((uint64_t)pkt->data_len << ICE_TXD_QW1_TX_BUF_SZ_S));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
__m128i descriptor = _mm_set_epi64x(high_qw, rte_pktmbuf_iova(pkt));
_mm_store_si128(RTE_CAST_PTR(__m128i *, txdp), descriptor);
@@ -627,7 +626,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
uint64_t flags = ICE_TD_CMD;
- uint64_t rs = ICE_TX_DESC_CMD_RS | ICE_TD_CMD;
+ uint64_t rs = CI_TX_DESC_CMD_RS | ICE_TD_CMD;
int i;
/* cross rx_thresh boundary is not allowed */
@@ -673,8 +672,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) <<
- ICE_TXD_QW1_CMD_S);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index 23666539ab..587871b54a 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -271,7 +271,7 @@ idpf_qc_single_tx_queue_reset(struct ci_tx_queue *txq)
prev = (uint16_t)(txq->nb_tx_desc - 1);
for (i = 0; i < txq->nb_tx_desc; i++) {
txq->ci_tx_ring[i].cmd_type_offset_bsz =
- rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
txe[i].last_id = i;
txe[prev].next_id = i;
@@ -849,7 +849,7 @@ idpf_calc_context_desc(uint64_t flags)
*/
static inline void
idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf,
- union idpf_tx_offload tx_offload,
+ union ci_tx_offload tx_offload,
volatile union idpf_flex_tx_ctx_desc *ctx_desc)
{
uint16_t cmd_dtype;
@@ -887,7 +887,7 @@ idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
volatile struct idpf_flex_tx_sched_desc *txr;
volatile struct idpf_flex_tx_sched_desc *txd;
struct ci_tx_entry *sw_ring;
- union idpf_tx_offload tx_offload = {0};
+ union ci_tx_offload tx_offload = {0};
struct ci_tx_entry *txe, *txn;
uint16_t nb_used, tx_id, sw_id;
struct rte_mbuf *tx_pkt;
@@ -1334,7 +1334,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
{
volatile struct ci_tx_desc *txd;
volatile struct ci_tx_desc *txr;
- union idpf_tx_offload tx_offload = {0};
+ union ci_tx_offload tx_offload = {0};
struct ci_tx_entry *txe, *txn;
struct ci_tx_entry *sw_ring;
struct ci_tx_queue *txq;
@@ -1452,10 +1452,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
slen = m_seg->data_len;
buf_dma_addr = rte_mbuf_data_iova(m_seg);
txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << IDPF_TXD_QW1_CMD_S) |
- ((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) |
- ((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S));
+ txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S));
txe->last_id = tx_last;
tx_id = txe->next_id;
@@ -1464,7 +1464,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
} while (m_seg);
/* The last packet data descriptor needs End Of Packet (EOP) */
- td_cmd |= IDPF_TX_DESC_CMD_EOP;
+ td_cmd |= CI_TX_DESC_CMD_EOP;
txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
@@ -1473,13 +1473,13 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
"%4u (port=%d queue=%d)",
tx_last, txq->port_id, txq->queue_id);
- td_cmd |= IDPF_TX_DESC_CMD_RS;
+ td_cmd |= CI_TX_DESC_CMD_RS;
/* Update txq RS bit counters */
txq->nb_tx_used = 0;
}
- txd->cmd_type_offset_bsz |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
+ txd->cmd_type_offset_bsz |= rte_cpu_to_le_16(td_cmd << CI_TXD_QW1_CMD_S);
}
end_of_tx:
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.h b/drivers/net/intel/idpf/idpf_common_rxtx.h
index 2f2fa153b2..b88a87402d 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.h
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.h
@@ -169,18 +169,6 @@ struct idpf_rx_queue {
uint32_t hw_register_set;
};
-/* Offload features */
-union idpf_tx_offload {
- uint64_t data;
- struct {
- uint64_t l2_len:7; /* L2 (MAC) Header Length. */
- uint64_t l3_len:9; /* L3 (IP) Header Length. */
- uint64_t l4_len:8; /* L4 Header Length. */
- uint64_t tso_segsz:16; /* TCP TSO segment size */
- /* uint64_t unused : 24; */
- };
-};
-
union idpf_tx_desc {
struct ci_tx_desc *tx_ring;
struct idpf_flex_tx_sched_desc *desc_ring;
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
index 04efee3722..b6bf7fca76 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
@@ -486,10 +486,9 @@ static inline void
idpf_singleq_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
- uint64_t high_qw =
- (IDPF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IDPF_TXD_QW1_CMD_S) |
- ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
__m128i descriptor = _mm_set_epi64x(high_qw,
pkt->buf_iova + pkt->data_off);
@@ -500,8 +499,7 @@ static inline void
idpf_singleq_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
- const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IDPF_TXD_QW1_CMD_S));
+ const uint64_t hi_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | (flags << CI_TXD_QW1_CMD_S));
/* if unaligned on 32-bit boundary, do one to align */
if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -511,22 +509,14 @@ idpf_singleq_vtx(volatile struct ci_tx_desc *txdp,
/* do two at a time while possible, in bursts */
for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
- uint64_t hi_qw3 =
- hi_qw_tmpl |
- ((uint64_t)pkt[3]->data_len <<
- IDPF_TXD_QW1_TX_BUF_SZ_S);
- uint64_t hi_qw2 =
- hi_qw_tmpl |
- ((uint64_t)pkt[2]->data_len <<
- IDPF_TXD_QW1_TX_BUF_SZ_S);
- uint64_t hi_qw1 =
- hi_qw_tmpl |
- ((uint64_t)pkt[1]->data_len <<
- IDPF_TXD_QW1_TX_BUF_SZ_S);
- uint64_t hi_qw0 =
- hi_qw_tmpl |
- ((uint64_t)pkt[0]->data_len <<
- IDPF_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw3 = hi_qw_tmpl |
+ ((uint64_t)pkt[3]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw2 = hi_qw_tmpl |
+ ((uint64_t)pkt[2]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw1 = hi_qw_tmpl |
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw0 = hi_qw_tmpl |
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
__m256i desc2_3 =
_mm256_set_epi64x
@@ -559,8 +549,8 @@ idpf_singleq_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts
volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
- uint64_t flags = IDPF_TX_DESC_CMD_EOP;
- uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
+ uint64_t flags = CI_TX_DESC_CMD_EOP;
+ uint64_t rs = CI_TX_DESC_CMD_RS | flags;
/* cross rx_thresh boundary is not allowed */
nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
@@ -605,8 +595,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
- IDPF_TXD_QW1_CMD_S);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
index d5e5a2ca5f..fcdec3a4d5 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
@@ -1003,10 +1003,9 @@ static __rte_always_inline void
idpf_singleq_vtx1(volatile struct ci_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
- uint64_t high_qw =
- (IDPF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IDPF_TXD_QW1_CMD_S) |
- ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
+ uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
__m128i descriptor = _mm_set_epi64x(high_qw,
pkt->buf_iova + pkt->data_off);
@@ -1019,8 +1018,7 @@ static __rte_always_inline void
idpf_singleq_vtx(volatile struct ci_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
- const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA |
- ((uint64_t)flags << IDPF_TXD_QW1_CMD_S));
+ const uint64_t hi_qw_tmpl = (CI_TX_DESC_DTYPE_DATA | (flags << CI_TXD_QW1_CMD_S));
/* if unaligned on 32-bit boundary, do one to align */
if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1030,22 +1028,14 @@ idpf_singleq_vtx(volatile struct ci_tx_desc *txdp,
/* do 4 at a time while possible, in bursts */
for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
- uint64_t hi_qw3 =
- hi_qw_tmpl |
- ((uint64_t)pkt[3]->data_len <<
- IDPF_TXD_QW1_TX_BUF_SZ_S);
- uint64_t hi_qw2 =
- hi_qw_tmpl |
- ((uint64_t)pkt[2]->data_len <<
- IDPF_TXD_QW1_TX_BUF_SZ_S);
- uint64_t hi_qw1 =
- hi_qw_tmpl |
- ((uint64_t)pkt[1]->data_len <<
- IDPF_TXD_QW1_TX_BUF_SZ_S);
- uint64_t hi_qw0 =
- hi_qw_tmpl |
- ((uint64_t)pkt[0]->data_len <<
- IDPF_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw3 = hi_qw_tmpl |
+ ((uint64_t)pkt[3]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw2 = hi_qw_tmpl |
+ ((uint64_t)pkt[2]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw1 = hi_qw_tmpl |
+ ((uint64_t)pkt[1]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
+ uint64_t hi_qw0 = hi_qw_tmpl |
+ ((uint64_t)pkt[0]->data_len << CI_TXD_QW1_TX_BUF_SZ_S);
__m512i desc0_3 =
_mm512_set_epi64
@@ -1075,8 +1065,8 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
volatile struct ci_tx_desc *txdp;
struct ci_tx_entry_vec *txep;
uint16_t n, nb_commit, tx_id;
- uint64_t flags = IDPF_TX_DESC_CMD_EOP;
- uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
+ uint64_t flags = CI_TX_DESC_CMD_EOP;
+ uint64_t rs = CI_TX_DESC_CMD_RS | flags;
/* cross rx_thresh boundary is not allowed */
nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
@@ -1124,8 +1114,7 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
- IDPF_TXD_QW1_CMD_S);
+ rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) << CI_TXD_QW1_CMD_S);
txq->tx_next_rs =
(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
}
diff --git a/drivers/net/intel/idpf/idpf_rxtx_vec_common.h b/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
index b5e8574667..a43d8f78e2 100644
--- a/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
+++ b/drivers/net/intel/idpf/idpf_rxtx_vec_common.h
@@ -32,8 +32,8 @@ idpf_tx_desc_done(struct ci_tx_queue *txq, uint16_t idx)
return 1;
return (txq->ci_tx_ring[idx].cmd_type_offset_bsz &
- rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) ==
- rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
+ rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) ==
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
}
static inline int
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 05/27] net/intel: create separate header for Tx scalar fns
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (3 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 04/27] net/intel: consolidate definitions for Tx desc fields Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 06/27] net/intel: add common fn to calculate needed descriptors Bruce Richardson
` (21 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
Rather than having all Tx code in the one file, which could start
getting rather long, move the scalar datapath functions to a new header
file.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 58 ++------------------
drivers/net/intel/common/tx_scalar_fns.h | 67 ++++++++++++++++++++++++
2 files changed, 72 insertions(+), 53 deletions(-)
create mode 100644 drivers/net/intel/common/tx_scalar_fns.h
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 3d3d9ad8e3..320ab0b8e0 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -309,59 +309,6 @@ ci_tx_free_bufs_vec(struct ci_tx_queue *txq, ci_desc_done_fn desc_done, bool ctx
return txq->tx_rs_thresh;
}
-/*
- * Common transmit descriptor cleanup function for Intel drivers.
- * Used by ice, i40e, iavf, and idpf drivers.
- *
- * Returns:
- * 0 on success
- * -1 if cleanup cannot proceed (descriptors not yet processed by HW)
- */
-static __rte_always_inline int
-ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
-{
- struct ci_tx_entry *sw_ring = txq->sw_ring;
- volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
- uint16_t last_desc_cleaned = txq->last_desc_cleaned;
- uint16_t nb_tx_desc = txq->nb_tx_desc;
- uint16_t desc_to_clean_to;
- uint16_t nb_tx_to_clean;
-
- /* Determine the last descriptor needing to be cleaned */
- desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
- if (desc_to_clean_to >= nb_tx_desc)
- desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
-
- /* Check to make sure the last descriptor to clean is done */
- desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-
- /* Check if descriptor is done - all drivers use 0xF as done value in bits 3:0 */
- if ((txd[desc_to_clean_to].cmd_type_offset_bsz & rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
- rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE)) {
- /* Descriptor not yet processed by hardware */
- return -1;
- }
-
- /* Figure out how many descriptors will be cleaned */
- if (last_desc_cleaned > desc_to_clean_to)
- nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) + desc_to_clean_to);
- else
- nb_tx_to_clean = (uint16_t)(desc_to_clean_to - last_desc_cleaned);
-
- /* The last descriptor to clean is done, so that means all the
- * descriptors from the last descriptor that was cleaned
- * up to the last descriptor with the RS bit set
- * are done. Only reset the threshold descriptor.
- */
- txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
-
- /* Update the txq to reflect the last descriptor that was cleaned */
- txq->last_desc_cleaned = desc_to_clean_to;
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
-
- return 0;
-}
-
static inline void
ci_txq_release_all_mbufs(struct ci_tx_queue *txq, bool use_ctx)
{
@@ -480,4 +427,9 @@ ci_tx_path_select(const struct ci_tx_path_features *req_features,
return idx;
}
+/* include the scalar functions at the end, so they can use the common definitions.
+ * This is done so drivers can use all functions just by including tx.h
+ */
+#include "tx_scalar_fns.h"
+
#endif /* _COMMON_INTEL_TX_H_ */
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
new file mode 100644
index 0000000000..c79210d084
--- /dev/null
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Intel Corporation
+ */
+
+#ifndef _COMMON_INTEL_TX_SCALAR_FNS_H_
+#define _COMMON_INTEL_TX_SCALAR_FNS_H_
+
+#include <stdint.h>
+#include <rte_byteorder.h>
+
+/* depends on common Tx definitions. */
+#include "tx.h"
+
+/*
+ * Common transmit descriptor cleanup function for Intel drivers.
+ * Used by ice, i40e, iavf, and idpf drivers.
+ *
+ * Returns:
+ * 0 on success
+ * -1 if cleanup cannot proceed (descriptors not yet processed by HW)
+ */
+static __rte_always_inline int
+ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
+{
+ struct ci_tx_entry *sw_ring = txq->sw_ring;
+ volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
+ uint16_t last_desc_cleaned = txq->last_desc_cleaned;
+ uint16_t nb_tx_desc = txq->nb_tx_desc;
+ uint16_t desc_to_clean_to;
+ uint16_t nb_tx_to_clean;
+
+ /* Determine the last descriptor needing to be cleaned */
+ desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
+ if (desc_to_clean_to >= nb_tx_desc)
+ desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
+
+ /* Check to make sure the last descriptor to clean is done */
+ desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
+
+ /* Check if descriptor is done - all drivers use 0xF as done value in bits 3:0 */
+ if ((txd[desc_to_clean_to].cmd_type_offset_bsz & rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE)) {
+ /* Descriptor not yet processed by hardware */
+ return -1;
+ }
+
+ /* Figure out how many descriptors will be cleaned */
+ if (last_desc_cleaned > desc_to_clean_to)
+ nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) + desc_to_clean_to);
+ else
+ nb_tx_to_clean = (uint16_t)(desc_to_clean_to - last_desc_cleaned);
+
+ /* The last descriptor to clean is done, so that means all the
+ * descriptors from the last descriptor that was cleaned
+ * up to the last descriptor with the RS bit set
+ * are done. Only reset the threshold descriptor.
+ */
+ txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
+
+ /* Update the txq to reflect the last descriptor that was cleaned */
+ txq->last_desc_cleaned = desc_to_clean_to;
+ txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
+
+ return 0;
+}
+
+#endif /* _COMMON_INTEL_TX_SCALAR_FNS_H_ */
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 06/27] net/intel: add common fn to calculate needed descriptors
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (4 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 05/27] net/intel: create separate header for Tx scalar fns Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 07/27] net/ice: refactor context descriptor handling Bruce Richardson
` (20 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Vladimir Medvedkin, Anatoly Burakov,
Jingjing Wu, Praveen Shetty
Multiple drivers used the same logic to calculate how many Tx data
descriptors were needed. Move that calculation to common code. In the
process of updating drivers, fix idpf driver calculation for the TSO
case.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 21 +++++++++++++++++++++
drivers/net/intel/i40e/i40e_rxtx.c | 18 +-----------------
drivers/net/intel/iavf/iavf_rxtx.c | 17 +----------------
drivers/net/intel/ice/ice_rxtx.c | 18 +-----------------
drivers/net/intel/idpf/idpf_common_rxtx.c | 21 +++++++++++++++++----
5 files changed, 41 insertions(+), 54 deletions(-)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index c79210d084..f894cea616 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -64,4 +64,25 @@ ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
return 0;
}
+static inline uint16_t
+ci_div_roundup16(uint16_t x, uint16_t y)
+{
+ return (uint16_t)((x + y - 1) / y);
+}
+
+/* Calculate the number of TX descriptors needed for each pkt */
+static inline uint16_t
+ci_calc_pkt_desc(const struct rte_mbuf *tx_pkt)
+{
+ uint16_t count = 0;
+
+ while (tx_pkt != NULL) {
+ count += ci_div_roundup16(tx_pkt->data_len, CI_MAX_DATA_PER_TXD);
+ tx_pkt = tx_pkt->next;
+ }
+
+ return count;
+}
+
+
#endif /* _COMMON_INTEL_TX_SCALAR_FNS_H_ */
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 892069372f..886be06a89 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -1025,21 +1025,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
return ctx_desc;
}
-/* Calculate the number of TX descriptors needed for each pkt */
-static inline uint16_t
-i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt)
-{
- struct rte_mbuf *txd = tx_pkt;
- uint16_t count = 0;
-
- while (txd != NULL) {
- count += DIV_ROUND_UP(txd->data_len, CI_MAX_DATA_PER_TXD);
- txd = txd->next;
- }
-
- return count;
-}
-
uint16_t
i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
@@ -1102,8 +1087,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
* per tx desc.
*/
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
- nb_used = (uint16_t)(i40e_calc_pkt_desc(tx_pkt) +
- nb_ctx);
+ nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx);
else
nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
tx_last = (uint16_t)(tx_id + nb_used - 1);
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 9946e112e8..ecf954a2c2 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -2667,21 +2667,6 @@ iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1,
((uint64_t)l2tag1 << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT));
}
-/* Calculate the number of TX descriptors needed for each pkt */
-static inline uint16_t
-iavf_calc_pkt_desc(struct rte_mbuf *tx_pkt)
-{
- struct rte_mbuf *txd = tx_pkt;
- uint16_t count = 0;
-
- while (txd != NULL) {
- count += (txd->data_len + CI_MAX_DATA_PER_TXD - 1) / CI_MAX_DATA_PER_TXD;
- txd = txd->next;
- }
-
- return count;
-}
-
static inline void
iavf_fill_data_desc(volatile struct ci_tx_desc *desc,
uint64_t desc_template, uint16_t buffsz,
@@ -2767,7 +2752,7 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
* per tx desc.
*/
if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
- nb_desc_required = iavf_calc_pkt_desc(mb) + nb_desc_ctx + nb_desc_ipsec;
+ nb_desc_required = ci_calc_pkt_desc(mb) + nb_desc_ctx + nb_desc_ipsec;
else
nb_desc_required = nb_desc_data + nb_desc_ctx + nb_desc_ipsec;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 5864238092..c2a38b1a13 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -3067,21 +3067,6 @@ ice_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
return ctx_desc;
}
-/* Calculate the number of TX descriptors needed for each pkt */
-static inline uint16_t
-ice_calc_pkt_desc(struct rte_mbuf *tx_pkt)
-{
- struct rte_mbuf *txd = tx_pkt;
- uint16_t count = 0;
-
- while (txd != NULL) {
- count += DIV_ROUND_UP(txd->data_len, CI_MAX_DATA_PER_TXD);
- txd = txd->next;
- }
-
- return count;
-}
-
uint16_t
ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
@@ -3144,8 +3129,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
* per tx desc.
*/
if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
- nb_used = (uint16_t)(ice_calc_pkt_desc(tx_pkt) +
- nb_ctx);
+ nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx);
else
nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
tx_last = (uint16_t)(tx_id + nb_used - 1);
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index 587871b54a..11d6848430 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -934,7 +934,16 @@ idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_offload.tso_segsz = tx_pkt->tso_segsz;
/* Calculate the number of context descriptors needed. */
nb_ctx = idpf_calc_context_desc(ol_flags);
- nb_used = tx_pkt->nb_segs + nb_ctx;
+
+ /* Calculate the number of TX descriptors needed for
+ * each packet. For TSO packets, use ci_calc_pkt_desc as
+ * the mbuf data size might exceed max data size that hw allows
+ * per tx desc.
+ */
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+ nb_used = ci_calc_pkt_desc(tx_pkt) + nb_ctx;
+ else
+ nb_used = tx_pkt->nb_segs + nb_ctx;
if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
cmd_dtype = IDPF_TXD_FLEX_FLOW_CMD_CS_EN;
@@ -1382,10 +1391,14 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
nb_ctx = idpf_calc_context_desc(ol_flags);
/* The number of descriptors that must be allocated for
- * a packet equals to the number of the segments of that
- * packet plus 1 context descriptor if needed.
+ * a packet. For TSO packets, use ci_calc_pkt_desc as
+ * the mbuf data size might exceed max data size that hw allows
+ * per tx desc.
*/
- nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+ nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx);
+ else
+ nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
tx_last = (uint16_t)(tx_id + nb_used - 1);
/* Circular ring */
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 07/27] net/ice: refactor context descriptor handling
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (5 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 06/27] net/intel: add common fn to calculate needed descriptors Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 08/27] net/i40e: " Bruce Richardson
` (19 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Anatoly Burakov
Create a single function to manage all context descriptor handling,
which returns either 0 or 1 depending on whether a descriptor is needed
or not, as well as returning directly the descriptor contents if
relevant.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/ice/ice_rxtx.c | 96 ++++++++++++++++++--------------
1 file changed, 55 insertions(+), 41 deletions(-)
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index c2a38b1a13..b90a1b4ec4 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -3044,7 +3044,7 @@ ice_calc_context_desc(uint64_t flags)
/* set ice TSO context descriptor */
static inline uint64_t
-ice_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
+ice_set_tso_ctx(uint64_t ol_flags, const struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
{
uint64_t ctx_desc = 0;
uint32_t cd_cmd, hdr_len, cd_tso_len;
@@ -3055,7 +3055,7 @@ ice_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
}
hdr_len = tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len;
- hdr_len += (mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+ hdr_len += (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
tx_offload.outer_l2_len + tx_offload.outer_l3_len : 0;
cd_cmd = CI_TX_CTX_DESC_TSO;
@@ -3067,6 +3067,51 @@ ice_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
return ctx_desc;
}
+/* compute a context descriptor if one is necessary based on the ol_flags
+ *
+ * Returns 0 if no descriptor is necessary.
+ * Returns 1 if one is necessary and the contents of the descriptor are returned
+ * in the values pointed to by qw0 and qw1. td_offset may also be modified.
+ */
+static __rte_always_inline uint16_t
+get_context_desc(uint64_t ol_flags, const struct rte_mbuf *tx_pkt,
+ const union ci_tx_offload *tx_offload, const struct ci_tx_queue *txq,
+ uint32_t *td_offset, uint64_t *qw0, uint64_t *qw1)
+{
+ uint16_t cd_l2tag2 = 0;
+ uint64_t cd_type_cmd_tso_mss = ICE_TX_DESC_DTYPE_CTX;
+ uint32_t cd_tunneling_params = 0;
+ uint64_t ptp_tx_index = txq->ice_vsi->adapter->ptp_tx_index;
+
+ if (ice_calc_context_desc(ol_flags) == 0)
+ return 0;
+
+ if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+ *td_offset |= (tx_offload->outer_l2_len >> 1) << CI_TX_DESC_LEN_MACLEN_S;
+ ice_parse_tunneling_params(ol_flags, *tx_offload, &cd_tunneling_params);
+ }
+
+ if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
+ cd_type_cmd_tso_mss |= ice_set_tso_ctx(ol_flags, tx_pkt, *tx_offload);
+ else if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
+ cd_type_cmd_tso_mss |=
+ ((uint64_t)CI_TX_CTX_DESC_TSYN << CI_TXD_QW1_CMD_S) |
+ ((ptp_tx_index << ICE_TXD_CTX_QW1_TSYN_S) & ICE_TXD_CTX_QW1_TSYN_M);
+
+
+ /* TX context descriptor based double VLAN insert */
+ if (ol_flags & RTE_MBUF_F_TX_QINQ) {
+ cd_l2tag2 = tx_pkt->vlan_tci_outer;
+ cd_type_cmd_tso_mss |= ((uint64_t)CI_TX_CTX_DESC_IL2TAG2 << CI_TXD_QW1_CMD_S);
+ }
+
+ *qw0 = rte_cpu_to_le_32(cd_tunneling_params) |
+ ((uint64_t)rte_cpu_to_le_16(cd_l2tag2) << 32);
+ *qw1 = rte_cpu_to_le_64(cd_type_cmd_tso_mss);
+
+ return 1;
+}
+
uint16_t
ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
@@ -3077,7 +3122,6 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
struct ci_tx_entry *txe, *txn;
struct rte_mbuf *tx_pkt;
struct rte_mbuf *m_seg;
- uint32_t cd_tunneling_params;
uint16_t tx_id;
uint16_t ts_id = -1;
uint16_t nb_tx;
@@ -3106,20 +3150,24 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
(void)ci_tx_xmit_cleanup(txq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+ uint64_t cd_qw0, cd_qw1;
tx_pkt = *tx_pkts++;
td_cmd = 0;
td_tag = 0;
td_offset = 0;
ol_flags = tx_pkt->ol_flags;
+
tx_offload.l2_len = tx_pkt->l2_len;
tx_offload.l3_len = tx_pkt->l3_len;
tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
tx_offload.l4_len = tx_pkt->l4_len;
tx_offload.tso_segsz = tx_pkt->tso_segsz;
+
/* Calculate the number of context descriptors needed. */
- nb_ctx = ice_calc_context_desc(ol_flags);
+ nb_ctx = get_context_desc(ol_flags, tx_pkt, &tx_offload,
+ txq, &td_offset, &cd_qw0, &cd_qw1);
/* The number of descriptors that must be allocated for
* a packet equals to the number of the segments of that
@@ -3161,15 +3209,6 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
td_tag = tx_pkt->vlan_tci;
}
- /* Fill in tunneling parameters if necessary */
- cd_tunneling_params = 0;
- if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
- td_offset |= (tx_offload.outer_l2_len >> 1)
- << CI_TX_DESC_LEN_MACLEN_S;
- ice_parse_tunneling_params(ol_flags, tx_offload,
- &cd_tunneling_params);
- }
-
/* Enable checksum offloading */
if (ol_flags & ICE_TX_CKSUM_OFFLOAD_MASK)
ice_txd_enable_checksum(ol_flags, &td_cmd,
@@ -3177,11 +3216,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
if (nb_ctx) {
/* Setup TX context descriptor if required */
- volatile struct ice_tx_ctx_desc *ctx_txd =
- (volatile struct ice_tx_ctx_desc *)
- &ci_tx_ring[tx_id];
- uint16_t cd_l2tag2 = 0;
- uint64_t cd_type_cmd_tso_mss = ICE_TX_DESC_DTYPE_CTX;
+ uint64_t *ctx_txd = RTE_CAST_PTR(uint64_t *, &ci_tx_ring[tx_id]);
txn = &sw_ring[txe->next_id];
RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
@@ -3190,29 +3225,8 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txe->mbuf = NULL;
}
- if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
- cd_type_cmd_tso_mss |=
- ice_set_tso_ctx(tx_pkt, tx_offload);
- else if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
- cd_type_cmd_tso_mss |=
- ((uint64_t)CI_TX_CTX_DESC_TSYN <<
- CI_TXD_QW1_CMD_S) |
- (((uint64_t)txq->ice_vsi->adapter->ptp_tx_index <<
- ICE_TXD_CTX_QW1_TSYN_S) & ICE_TXD_CTX_QW1_TSYN_M);
-
- ctx_txd->tunneling_params =
- rte_cpu_to_le_32(cd_tunneling_params);
-
- /* TX context descriptor based double VLAN insert */
- if (ol_flags & RTE_MBUF_F_TX_QINQ) {
- cd_l2tag2 = tx_pkt->vlan_tci_outer;
- cd_type_cmd_tso_mss |=
- ((uint64_t)CI_TX_CTX_DESC_IL2TAG2 <<
- CI_TXD_QW1_CMD_S);
- }
- ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
- ctx_txd->qw1 =
- rte_cpu_to_le_64(cd_type_cmd_tso_mss);
+ ctx_txd[0] = cd_qw0;
+ ctx_txd[1] = cd_qw1;
txe->last_id = tx_last;
tx_id = txe->next_id;
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 08/27] net/i40e: refactor context descriptor handling
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (6 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 07/27] net/ice: refactor context descriptor handling Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 09/27] net/idpf: " Bruce Richardson
` (18 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
move all context descriptor handling to a single function, as with the
ice driver, and use the same function signature as that driver.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/i40e/i40e_rxtx.c | 109 +++++++++++++++--------------
1 file changed, 58 insertions(+), 51 deletions(-)
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 886be06a89..82c4c6017b 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -1000,7 +1000,7 @@ i40e_calc_context_desc(uint64_t flags)
/* set i40e TSO context descriptor */
static inline uint64_t
-i40e_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
+i40e_set_tso_ctx(uint64_t ol_flags, const struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
{
uint64_t ctx_desc = 0;
uint32_t cd_cmd, hdr_len, cd_tso_len;
@@ -1011,7 +1011,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
}
hdr_len = tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len;
- hdr_len += (mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+ hdr_len += (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
tx_offload.outer_l2_len + tx_offload.outer_l3_len : 0;
cd_cmd = I40E_TX_CTX_DESC_TSO;
@@ -1025,6 +1025,53 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union ci_tx_offload tx_offload)
return ctx_desc;
}
+/* compute a context descriptor if one is necessary based on the ol_flags
+ *
+ * Returns 0 if no descriptor is necessary.
+ * Returns 1 if one is necessary and the contents of the descriptor are returned
+ * in the values pointed to by qw0 and qw1. td_offset may also be modified.
+ */
+static __rte_always_inline uint16_t
+get_context_desc(uint64_t ol_flags, const struct rte_mbuf *tx_pkt,
+ const union ci_tx_offload *tx_offload,
+ const struct ci_tx_queue *txq __rte_unused,
+ uint32_t *td_offset, uint64_t *qw0, uint64_t *qw1)
+{
+ uint16_t cd_l2tag2 = 0;
+ uint64_t cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
+ uint32_t cd_tunneling_params = 0;
+
+ if (i40e_calc_context_desc(ol_flags) == 0)
+ return 0;
+
+ if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+ *td_offset |= (tx_offload->outer_l2_len >> 1) << CI_TX_DESC_LEN_MACLEN_S;
+ i40e_parse_tunneling_params(ol_flags, *tx_offload, &cd_tunneling_params);
+ }
+
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+ cd_type_cmd_tso_mss |= i40e_set_tso_ctx(ol_flags, tx_pkt, *tx_offload);
+ else {
+#ifdef RTE_LIBRTE_IEEE1588
+ if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
+ cd_type_cmd_tso_mss |=
+ ((uint64_t)I40E_TX_CTX_DESC_TSYN << I40E_TXD_CTX_QW1_CMD_SHIFT);
+#endif
+ }
+
+ /* TX context descriptor based double VLAN insert */
+ if (ol_flags & RTE_MBUF_F_TX_QINQ) {
+ cd_l2tag2 = tx_pkt->vlan_tci_outer;
+ cd_type_cmd_tso_mss |= ((uint64_t)I40E_TX_CTX_DESC_IL2TAG2 << I40E_TXD_CTX_QW1_CMD_SHIFT);
+ }
+
+ *qw0 = rte_cpu_to_le_32(cd_tunneling_params) |
+ ((uint64_t)rte_cpu_to_le_16(cd_l2tag2) << 32);
+ *qw1 = rte_cpu_to_le_64(cd_type_cmd_tso_mss);
+
+ return 1;
+}
+
uint16_t
i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
@@ -1035,7 +1082,6 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
volatile struct ci_tx_desc *txr;
struct rte_mbuf *tx_pkt;
struct rte_mbuf *m_seg;
- uint32_t cd_tunneling_params;
uint16_t tx_id;
uint16_t nb_tx;
uint32_t td_cmd;
@@ -1076,7 +1122,9 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
tx_offload.tso_segsz = tx_pkt->tso_segsz;
/* Calculate the number of context descriptors needed. */
- nb_ctx = i40e_calc_context_desc(ol_flags);
+ uint64_t cd_qw0 = 0, cd_qw1 = 0;
+ nb_ctx = get_context_desc(ol_flags, tx_pkt, &tx_offload, txq, &td_offset,
+ &cd_qw0, &cd_qw1);
/**
* The number of descriptors that must be allocated for
@@ -1122,14 +1170,6 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Always enable CRC offload insertion */
td_cmd |= CI_TX_DESC_CMD_ICRC;
- /* Fill in tunneling parameters if necessary */
- cd_tunneling_params = 0;
- if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
- td_offset |= (tx_offload.outer_l2_len >> 1)
- << CI_TX_DESC_LEN_MACLEN_S;
- i40e_parse_tunneling_params(ol_flags, tx_offload,
- &cd_tunneling_params);
- }
/* Enable checksum offloading */
if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK)
i40e_txd_enable_checksum(ol_flags, &td_cmd,
@@ -1137,12 +1177,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
if (nb_ctx) {
/* Setup TX context descriptor if required */
- volatile struct i40e_tx_context_desc *ctx_txd =
- (volatile struct i40e_tx_context_desc *)\
- &txr[tx_id];
- uint16_t cd_l2tag2 = 0;
- uint64_t cd_type_cmd_tso_mss =
- I40E_TX_DESC_DTYPE_CONTEXT;
+ uint64_t *desc = RTE_CAST_PTR(uint64_t *, &txr[tx_id]);
txn = &sw_ring[txe->next_id];
RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
@@ -1151,41 +1186,13 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txe->mbuf = NULL;
}
- /* TSO enabled means no timestamp */
- if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
- cd_type_cmd_tso_mss |=
- i40e_set_tso_ctx(tx_pkt, tx_offload);
- else {
-#ifdef RTE_LIBRTE_IEEE1588
- if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
- cd_type_cmd_tso_mss |=
- ((uint64_t)I40E_TX_CTX_DESC_TSYN <<
- I40E_TXD_CTX_QW1_CMD_SHIFT);
-#endif
- }
-
- ctx_txd->tunneling_params =
- rte_cpu_to_le_32(cd_tunneling_params);
- if (ol_flags & RTE_MBUF_F_TX_QINQ) {
- cd_l2tag2 = tx_pkt->vlan_tci_outer;
- cd_type_cmd_tso_mss |=
- ((uint64_t)I40E_TX_CTX_DESC_IL2TAG2 <<
- I40E_TXD_CTX_QW1_CMD_SHIFT);
- }
- ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
- ctx_txd->type_cmd_tso_mss =
- rte_cpu_to_le_64(cd_type_cmd_tso_mss);
+ desc[0] = cd_qw0;
+ desc[1] = cd_qw1;
PMD_TX_LOG(DEBUG, "mbuf: %p, TCD[%u]: "
- "tunneling_params: %#x; "
- "l2tag2: %#hx; "
- "rsvd: %#hx; "
- "type_cmd_tso_mss: %#"PRIx64";",
- tx_pkt, tx_id,
- ctx_txd->tunneling_params,
- ctx_txd->l2tag2,
- ctx_txd->rsvd,
- ctx_txd->type_cmd_tso_mss);
+ "qw0: %#"PRIx64"; "
+ "qw1: %#"PRIx64";",
+ tx_pkt, tx_id, cd_qw0, cd_qw1);
txe->last_id = tx_last;
tx_id = txe->next_id;
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 09/27] net/idpf: refactor context descriptor handling
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (7 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 08/27] net/i40e: " Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 10/27] net/intel: consolidate checksum mask definition Bruce Richardson
` (17 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Jingjing Wu, Praveen Shetty
move all context descriptor handling to a single function, as with the
ice driver.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/idpf/idpf_common_rxtx.c | 61 +++++++++++------------
1 file changed, 28 insertions(+), 33 deletions(-)
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index 11d6848430..9219ad9047 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -845,37 +845,36 @@ idpf_calc_context_desc(uint64_t flags)
return 0;
}
-/* set TSO context descriptor
+/* set TSO context descriptor, returns 0 if no context needed, 1 if context set
*/
-static inline void
-idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf,
+static inline uint16_t
+idpf_set_tso_ctx(uint64_t ol_flags, struct rte_mbuf *mbuf,
union ci_tx_offload tx_offload,
- volatile union idpf_flex_tx_ctx_desc *ctx_desc)
+ uint64_t *qw0, uint64_t *qw1)
{
- uint16_t cmd_dtype;
+ uint16_t cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX | IDPF_TX_FLEX_CTX_DESC_CMD_TSO;
+ uint16_t tso_segsz = mbuf->tso_segsz;
uint32_t tso_len;
uint8_t hdr_len;
+ if (idpf_calc_context_desc(ol_flags) == 0)
+ return 0;
+
+ /* TSO context descriptor setup */
if (tx_offload.l4_len == 0) {
TX_LOG(DEBUG, "L4 length set to 0");
- return;
+ return 0;
}
- hdr_len = tx_offload.l2_len +
- tx_offload.l3_len +
- tx_offload.l4_len;
- cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX |
- IDPF_TX_FLEX_CTX_DESC_CMD_TSO;
+ hdr_len = tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len;
tso_len = mbuf->pkt_len - hdr_len;
- ctx_desc->tso.qw1.cmd_dtype = rte_cpu_to_le_16(cmd_dtype);
- ctx_desc->tso.qw0.hdr_len = hdr_len;
- ctx_desc->tso.qw0.mss_rt =
- rte_cpu_to_le_16((uint16_t)mbuf->tso_segsz &
- IDPF_TXD_FLEX_CTX_MSS_RT_M);
- ctx_desc->tso.qw0.flex_tlen =
- rte_cpu_to_le_32(tso_len &
- IDPF_TXD_FLEX_CTX_MSS_RT_M);
+ *qw0 = rte_cpu_to_le_32(tso_len & IDPF_TXD_FLEX_CTX_MSS_RT_M) |
+ ((uint64_t)rte_cpu_to_le_16(tso_segsz & IDPF_TXD_FLEX_CTX_MSS_RT_M) << 32) |
+ ((uint64_t)hdr_len << 48);
+ *qw1 = rte_cpu_to_le_16(cmd_dtype);
+
+ return 1;
}
RTE_EXPORT_INTERNAL_SYMBOL(idpf_dp_splitq_xmit_pkts)
@@ -933,7 +932,8 @@ idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_offload.l4_len = tx_pkt->l4_len;
tx_offload.tso_segsz = tx_pkt->tso_segsz;
/* Calculate the number of context descriptors needed. */
- nb_ctx = idpf_calc_context_desc(ol_flags);
+ uint64_t cd_qw0, cd_qw1;
+ nb_ctx = idpf_set_tso_ctx(ol_flags, tx_pkt, tx_offload, &cd_qw0, &cd_qw1);
/* Calculate the number of TX descriptors needed for
* each packet. For TSO packets, use ci_calc_pkt_desc as
@@ -950,12 +950,10 @@ idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
/* context descriptor */
if (nb_ctx != 0) {
- volatile union idpf_flex_tx_ctx_desc *ctx_desc =
- (volatile union idpf_flex_tx_ctx_desc *)&txr[tx_id];
+ uint64_t *ctx_desc = RTE_CAST_PTR(uint64_t *, &txr[tx_id]);
- if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
- idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
- ctx_desc);
+ ctx_desc[0] = cd_qw0;
+ ctx_desc[1] = cd_qw1;
tx_id++;
if (tx_id == txq->nb_tx_desc)
@@ -1388,7 +1386,8 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_offload.l4_len = tx_pkt->l4_len;
tx_offload.tso_segsz = tx_pkt->tso_segsz;
/* Calculate the number of context descriptors needed. */
- nb_ctx = idpf_calc_context_desc(ol_flags);
+ uint64_t cd_qw0, cd_qw1;
+ nb_ctx = idpf_set_tso_ctx(ol_flags, tx_pkt, tx_offload, &cd_qw0, &cd_qw1);
/* The number of descriptors that must be allocated for
* a packet. For TSO packets, use ci_calc_pkt_desc as
@@ -1431,9 +1430,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
if (nb_ctx != 0) {
/* Setup TX context descriptor if required */
- volatile union idpf_flex_tx_ctx_desc *ctx_txd =
- (volatile union idpf_flex_tx_ctx_desc *)
- &txr[tx_id];
+ uint64_t *ctx_txd = RTE_CAST_PTR(uint64_t *, &txr[tx_id]);
txn = &sw_ring[txe->next_id];
RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
@@ -1442,10 +1439,8 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
txe->mbuf = NULL;
}
- /* TSO enabled */
- if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
- idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
- ctx_txd);
+ ctx_txd[0] = cd_qw0;
+ ctx_txd[1] = cd_qw1;
txe->last_id = tx_last;
tx_id = txe->next_id;
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 10/27] net/intel: consolidate checksum mask definition
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (8 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 09/27] net/idpf: " Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 11/27] net/intel: create common checksum Tx offload function Bruce Richardson
` (16 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Vladimir Medvedkin, Anatoly Burakov,
Jingjing Wu, Praveen Shetty
Create a common definition for checksum masks across iavf, idpf, i40e
and ice drivers.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 7 +++++++
drivers/net/intel/i40e/i40e_rxtx.c | 7 +------
drivers/net/intel/iavf/iavf_rxtx.c | 2 +-
drivers/net/intel/iavf/iavf_rxtx.h | 8 --------
drivers/net/intel/ice/ice_rxtx.c | 7 +------
drivers/net/intel/idpf/idpf_common_rxtx.c | 4 ++--
drivers/net/intel/idpf/idpf_common_rxtx.h | 7 +------
7 files changed, 13 insertions(+), 29 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 320ab0b8e0..a71b98f119 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -53,6 +53,13 @@
/* Common maximum data per TX descriptor */
#define CI_MAX_DATA_PER_TXD (CI_TXD_QW1_TX_BUF_SZ_M >> CI_TXD_QW1_TX_BUF_SZ_S)
+/* Checksum offload mask to identify packets requesting offload */
+#define CI_TX_CKSUM_OFFLOAD_MASK (RTE_MBUF_F_TX_IP_CKSUM | \
+ RTE_MBUF_F_TX_L4_MASK | \
+ RTE_MBUF_F_TX_TCP_SEG | \
+ RTE_MBUF_F_TX_OUTER_IP_CKSUM | \
+ RTE_MBUF_F_TX_OUTER_UDP_CKSUM)
+
/**
* Common TX offload union for Intel drivers.
* Supports both basic offloads (l2_len, l3_len, l4_len, tso_segsz) and
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 82c4c6017b..e1964eab97 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -49,11 +49,6 @@
#define I40E_TX_IEEE1588_TMST 0
#endif
-#define I40E_TX_CKSUM_OFFLOAD_MASK (RTE_MBUF_F_TX_IP_CKSUM | \
- RTE_MBUF_F_TX_L4_MASK | \
- RTE_MBUF_F_TX_TCP_SEG | \
- RTE_MBUF_F_TX_OUTER_IP_CKSUM)
-
#define I40E_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_OUTER_IPV4 | \
RTE_MBUF_F_TX_OUTER_IPV6 | \
RTE_MBUF_F_TX_IPV4 | \
@@ -1171,7 +1166,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
td_cmd |= CI_TX_DESC_CMD_ICRC;
/* Enable checksum offloading */
- if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK)
+ if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
i40e_txd_enable_checksum(ol_flags, &td_cmd,
&td_offset, tx_offload);
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index ecf954a2c2..9ce978e69c 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -2597,7 +2597,7 @@ iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1,
}
if ((m->ol_flags &
- (IAVF_TX_CKSUM_OFFLOAD_MASK | RTE_MBUF_F_TX_SEC_OFFLOAD)) == 0)
+ (CI_TX_CKSUM_OFFLOAD_MASK | RTE_MBUF_F_TX_SEC_OFFLOAD)) == 0)
goto skip_cksum;
/* Set MACLEN */
diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
index 86281aa965..4080184b3b 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.h
+++ b/drivers/net/intel/iavf/iavf_rxtx.h
@@ -136,14 +136,6 @@
#define IAVF_TX_MIN_PKT_LEN 17
-#define IAVF_TX_CKSUM_OFFLOAD_MASK ( \
- RTE_MBUF_F_TX_IP_CKSUM | \
- RTE_MBUF_F_TX_L4_MASK | \
- RTE_MBUF_F_TX_TCP_SEG | \
- RTE_MBUF_F_TX_UDP_SEG | \
- RTE_MBUF_F_TX_OUTER_IP_CKSUM | \
- RTE_MBUF_F_TX_OUTER_UDP_CKSUM)
-
#define IAVF_TX_OFFLOAD_MASK ( \
RTE_MBUF_F_TX_OUTER_IPV6 | \
RTE_MBUF_F_TX_OUTER_IPV4 | \
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index b90a1b4ec4..e102eb9bcc 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -9,11 +9,6 @@
#include "ice_rxtx.h"
#include "ice_rxtx_vec_common.h"
-#define ICE_TX_CKSUM_OFFLOAD_MASK (RTE_MBUF_F_TX_IP_CKSUM | \
- RTE_MBUF_F_TX_L4_MASK | \
- RTE_MBUF_F_TX_TCP_SEG | \
- RTE_MBUF_F_TX_UDP_SEG | \
- RTE_MBUF_F_TX_OUTER_IP_CKSUM)
/**
* The mbuf dynamic field pointer for protocol extraction metadata.
@@ -3210,7 +3205,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
}
/* Enable checksum offloading */
- if (ol_flags & ICE_TX_CKSUM_OFFLOAD_MASK)
+ if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
ice_txd_enable_checksum(ol_flags, &td_cmd,
&td_offset, tx_offload);
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index 9219ad9047..b34d545a0a 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -945,7 +945,7 @@ idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
else
nb_used = tx_pkt->nb_segs + nb_ctx;
- if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
+ if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
cmd_dtype = IDPF_TXD_FLEX_FLOW_CMD_CS_EN;
/* context descriptor */
@@ -1425,7 +1425,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
}
}
- if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
+ if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
if (nb_ctx != 0) {
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.h b/drivers/net/intel/idpf/idpf_common_rxtx.h
index b88a87402d..fe7094d434 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.h
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.h
@@ -39,13 +39,8 @@
#define IDPF_RLAN_CTX_DBUF_S 7
#define IDPF_RX_MAX_DATA_BUF_SIZE (16 * 1024 - 128)
-#define IDPF_TX_CKSUM_OFFLOAD_MASK ( \
- RTE_MBUF_F_TX_IP_CKSUM | \
- RTE_MBUF_F_TX_L4_MASK | \
- RTE_MBUF_F_TX_TCP_SEG)
-
#define IDPF_TX_OFFLOAD_MASK ( \
- IDPF_TX_CKSUM_OFFLOAD_MASK | \
+ CI_TX_CKSUM_OFFLOAD_MASK | \
RTE_MBUF_F_TX_IPV4 | \
RTE_MBUF_F_TX_IPV6)
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 11/27] net/intel: create common checksum Tx offload function
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (9 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 10/27] net/intel: consolidate checksum mask definition Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 12/27] net/intel: create a common scalar Tx function Bruce Richardson
` (15 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Anatoly Burakov
Since i40e and ice have the same checksum offload logic, merge their
functions into one. Future rework should enable this to be used by more
drivers also.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 63 +++++++++++++++++++++++
drivers/net/intel/i40e/i40e_rxtx.c | 57 +--------------------
drivers/net/intel/ice/ice_rxtx.c | 64 +-----------------------
3 files changed, 65 insertions(+), 119 deletions(-)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index f894cea616..95ee7dc35f 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -64,6 +64,69 @@ ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
return 0;
}
+/* Common checksum enable function for Intel drivers (ice, i40e, etc.) */
+static inline void
+ci_txd_enable_checksum(uint64_t ol_flags,
+ uint32_t *td_cmd,
+ uint32_t *td_offset,
+ union ci_tx_offload tx_offload)
+{
+ /* Set MACLEN */
+ if (!(ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK))
+ *td_offset |= (tx_offload.l2_len >> 1)
+ << CI_TX_DESC_LEN_MACLEN_S;
+
+ /* Enable L3 checksum offloads */
+ if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ *td_offset |= (tx_offload.l3_len >> 2) <<
+ CI_TX_DESC_LEN_IPLEN_S;
+ } else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4;
+ *td_offset |= (tx_offload.l3_len >> 2) <<
+ CI_TX_DESC_LEN_IPLEN_S;
+ } else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
+ *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV6;
+ *td_offset |= (tx_offload.l3_len >> 2) <<
+ CI_TX_DESC_LEN_IPLEN_S;
+ }
+
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
+ *td_offset |= (tx_offload.l4_len >> 2) <<
+ CI_TX_DESC_LEN_L4_LEN_S;
+ return;
+ }
+
+ if (ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
+ *td_offset |= (tx_offload.l4_len >> 2) <<
+ CI_TX_DESC_LEN_L4_LEN_S;
+ return;
+ }
+
+ /* Enable L4 checksum offloads */
+ switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
+ case RTE_MBUF_F_TX_TCP_CKSUM:
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
+ *td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
+ CI_TX_DESC_LEN_L4_LEN_S;
+ break;
+ case RTE_MBUF_F_TX_SCTP_CKSUM:
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_SCTP;
+ *td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
+ CI_TX_DESC_LEN_L4_LEN_S;
+ break;
+ case RTE_MBUF_F_TX_UDP_CKSUM:
+ *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
+ *td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
+ CI_TX_DESC_LEN_L4_LEN_S;
+ break;
+ default:
+ break;
+ }
+}
+
static inline uint16_t
ci_div_roundup16(uint16_t x, uint16_t y)
{
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index e1964eab97..5d1b2e4217 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -306,61 +306,6 @@ i40e_parse_tunneling_params(uint64_t ol_flags,
*cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
}
-static inline void
-i40e_txd_enable_checksum(uint64_t ol_flags,
- uint32_t *td_cmd,
- uint32_t *td_offset,
- union ci_tx_offload tx_offload)
-{
- /* Set MACLEN */
- if (!(ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK))
- *td_offset |= (tx_offload.l2_len >> 1)
- << CI_TX_DESC_LEN_MACLEN_S;
-
- /* Enable L3 checksum offloads */
- if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
- *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
- *td_offset |= (tx_offload.l3_len >> 2)
- << CI_TX_DESC_LEN_IPLEN_S;
- } else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
- *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4;
- *td_offset |= (tx_offload.l3_len >> 2)
- << CI_TX_DESC_LEN_IPLEN_S;
- } else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
- *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV6;
- *td_offset |= (tx_offload.l3_len >> 2)
- << CI_TX_DESC_LEN_IPLEN_S;
- }
-
- if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
- *td_offset |= (tx_offload.l4_len >> 2)
- << CI_TX_DESC_LEN_L4_LEN_S;
- return;
- }
-
- /* Enable L4 checksum offloads */
- switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
- case RTE_MBUF_F_TX_TCP_CKSUM:
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
- *td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
- case RTE_MBUF_F_TX_SCTP_CKSUM:
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_SCTP;
- *td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
- case RTE_MBUF_F_TX_UDP_CKSUM:
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
- *td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
- default:
- break;
- }
-}
-
/* Construct the tx flags */
static inline uint64_t
i40e_build_ctob(uint32_t td_cmd,
@@ -1167,7 +1112,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Enable checksum offloading */
if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
- i40e_txd_enable_checksum(ol_flags, &td_cmd,
+ ci_txd_enable_checksum(ol_flags, &td_cmd,
&td_offset, tx_offload);
if (nb_ctx) {
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index e102eb9bcc..0b0179e1fa 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -2947,68 +2947,6 @@ ice_parse_tunneling_params(uint64_t ol_flags,
*cd_tunneling |= ICE_TXD_CTX_QW0_L4T_CS_M;
}
-static inline void
-ice_txd_enable_checksum(uint64_t ol_flags,
- uint32_t *td_cmd,
- uint32_t *td_offset,
- union ci_tx_offload tx_offload)
-{
- /* Set MACLEN */
- if (!(ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK))
- *td_offset |= (tx_offload.l2_len >> 1)
- << CI_TX_DESC_LEN_MACLEN_S;
-
- /* Enable L3 checksum offloads */
- if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
- *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
- *td_offset |= (tx_offload.l3_len >> 2) <<
- CI_TX_DESC_LEN_IPLEN_S;
- } else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
- *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV4;
- *td_offset |= (tx_offload.l3_len >> 2) <<
- CI_TX_DESC_LEN_IPLEN_S;
- } else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
- *td_cmd |= CI_TX_DESC_CMD_IIPT_IPV6;
- *td_offset |= (tx_offload.l3_len >> 2) <<
- CI_TX_DESC_LEN_IPLEN_S;
- }
-
- if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
- *td_offset |= (tx_offload.l4_len >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- return;
- }
-
- if (ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
- *td_offset |= (tx_offload.l4_len >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- return;
- }
-
- /* Enable L4 checksum offloads */
- switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
- case RTE_MBUF_F_TX_TCP_CKSUM:
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
- *td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
- case RTE_MBUF_F_TX_SCTP_CKSUM:
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_SCTP;
- *td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
- case RTE_MBUF_F_TX_UDP_CKSUM:
- *td_cmd |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
- *td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
- default:
- break;
- }
-}
-
/* Construct the tx flags */
static inline uint64_t
ice_build_ctob(uint32_t td_cmd,
@@ -3206,7 +3144,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Enable checksum offloading */
if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
- ice_txd_enable_checksum(ol_flags, &td_cmd,
+ ci_txd_enable_checksum(ol_flags, &td_cmd,
&td_offset, tx_offload);
if (nb_ctx) {
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 12/27] net/intel: create a common scalar Tx function
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (10 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 11/27] net/intel: create common checksum Tx offload function Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 13/27] net/i40e: use " Bruce Richardson
` (14 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Anatoly Burakov
Given the similarities between the transmit functions across various
Intel drivers, make a start on consolidating them by moving the ice Tx
function into common, for reuse by other drivers.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 215 ++++++++++++++++++
drivers/net/intel/ice/ice_rxtx.c | 268 +++++------------------
2 files changed, 267 insertions(+), 216 deletions(-)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 95ee7dc35f..70b22f1da0 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -6,6 +6,7 @@
#define _COMMON_INTEL_TX_SCALAR_FNS_H_
#include <stdint.h>
+#include <rte_io.h>
#include <rte_byteorder.h>
/* depends on common Tx definitions. */
@@ -147,5 +148,219 @@ ci_calc_pkt_desc(const struct rte_mbuf *tx_pkt)
return count;
}
+typedef uint16_t (*ci_get_ctx_desc_fn)(uint64_t ol_flags, const struct rte_mbuf *mbuf,
+ const union ci_tx_offload *tx_offload, const struct ci_tx_queue *txq,
+ uint32_t *td_offset, uint64_t *qw0, uint64_t *qw1);
+
+/* gets current timestamp tail index */
+typedef uint16_t (*get_ts_tail_t)(struct ci_tx_queue *txq);
+/* writes a timestamp descriptor and returns new tail index */
+typedef uint16_t (*write_ts_desc_t)(struct ci_tx_queue *txq, struct rte_mbuf *mbuf,
+ uint16_t tx_id, uint16_t ts_id);
+/* writes a timestamp tail index - doorbell */
+typedef void (*write_ts_tail_t)(struct ci_tx_queue *txq, uint16_t ts_id);
+
+struct ci_timesstamp_queue_fns {
+ get_ts_tail_t get_ts_tail;
+ write_ts_desc_t write_ts_desc;
+ write_ts_tail_t write_ts_tail;
+};
+
+static inline uint16_t
+ci_xmit_pkts(struct ci_tx_queue *txq,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts,
+ ci_get_ctx_desc_fn get_ctx_desc,
+ const struct ci_timesstamp_queue_fns *ts_fns)
+{
+ volatile struct ci_tx_desc *ci_tx_ring;
+ volatile struct ci_tx_desc *txd;
+ struct ci_tx_entry *sw_ring;
+ struct ci_tx_entry *txe, *txn;
+ struct rte_mbuf *tx_pkt;
+ struct rte_mbuf *m_seg;
+ uint16_t tx_id;
+ uint16_t ts_id = -1;
+ uint16_t nb_tx;
+ uint16_t nb_used;
+ uint16_t nb_ctx;
+ uint32_t td_cmd = 0;
+ uint32_t td_offset = 0;
+ uint32_t td_tag = 0;
+ uint16_t tx_last;
+ uint16_t slen;
+ uint64_t buf_dma_addr;
+ uint64_t ol_flags;
+ union ci_tx_offload tx_offload = {0};
+
+ sw_ring = txq->sw_ring;
+ ci_tx_ring = txq->ci_tx_ring;
+ tx_id = txq->tx_tail;
+ txe = &sw_ring[tx_id];
+
+ if (ts_fns != NULL)
+ ts_id = ts_fns->get_ts_tail(txq);
+
+ /* Check if the descriptor ring needs to be cleaned. */
+ if (txq->nb_tx_free < txq->tx_free_thresh)
+ (void)ci_tx_xmit_cleanup(txq);
+
+ for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+ uint64_t cd_qw0, cd_qw1;
+ tx_pkt = *tx_pkts++;
+
+ td_cmd = CI_TX_DESC_CMD_ICRC;
+ td_tag = 0;
+ td_offset = 0;
+ ol_flags = tx_pkt->ol_flags;
+
+ tx_offload.l2_len = tx_pkt->l2_len;
+ tx_offload.l3_len = tx_pkt->l3_len;
+ tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
+ tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
+ tx_offload.l4_len = tx_pkt->l4_len;
+ tx_offload.tso_segsz = tx_pkt->tso_segsz;
+
+ /* Calculate the number of context descriptors needed. */
+ nb_ctx = get_ctx_desc(ol_flags, tx_pkt, &tx_offload,
+ txq, &td_offset, &cd_qw0, &cd_qw1);
+
+ /* The number of descriptors that must be allocated for
+ * a packet equals to the number of the segments of that
+ * packet plus the number of context descriptor if needed.
+ * Recalculate the needed tx descs when TSO enabled in case
+ * the mbuf data size exceeds max data size that hw allows
+ * per tx desc.
+ */
+ if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
+ nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx);
+ else
+ nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+ tx_last = (uint16_t)(tx_id + nb_used - 1);
+
+ /* Circular ring */
+ if (tx_last >= txq->nb_tx_desc)
+ tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
+
+ if (nb_used > txq->nb_tx_free) {
+ if (ci_tx_xmit_cleanup(txq) != 0) {
+ if (nb_tx == 0)
+ return 0;
+ goto end_of_tx;
+ }
+ if (unlikely(nb_used > txq->tx_rs_thresh)) {
+ while (nb_used > txq->nb_tx_free) {
+ if (ci_tx_xmit_cleanup(txq) != 0) {
+ if (nb_tx == 0)
+ return 0;
+ goto end_of_tx;
+ }
+ }
+ }
+ }
+
+ /* Descriptor based VLAN insertion */
+ if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+ td_cmd |= CI_TX_DESC_CMD_IL2TAG1;
+ td_tag = tx_pkt->vlan_tci;
+ }
+
+ /* Enable checksum offloading */
+ if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
+ ci_txd_enable_checksum(ol_flags, &td_cmd,
+ &td_offset, tx_offload);
+
+ if (nb_ctx) {
+ /* Setup TX context descriptor if required */
+ uint64_t *ctx_txd = RTE_CAST_PTR(uint64_t *, &ci_tx_ring[tx_id]);
+
+ txn = &sw_ring[txe->next_id];
+ RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
+ if (txe->mbuf) {
+ rte_pktmbuf_free_seg(txe->mbuf);
+ txe->mbuf = NULL;
+ }
+
+ ctx_txd[0] = cd_qw0;
+ ctx_txd[1] = cd_qw1;
+
+ txe->last_id = tx_last;
+ tx_id = txe->next_id;
+ txe = txn;
+ }
+ m_seg = tx_pkt;
+
+ do {
+ txd = &ci_tx_ring[tx_id];
+ txn = &sw_ring[txe->next_id];
+
+ if (txe->mbuf)
+ rte_pktmbuf_free_seg(txe->mbuf);
+ txe->mbuf = m_seg;
+
+ /* Setup TX Descriptor */
+ slen = m_seg->data_len;
+ buf_dma_addr = rte_mbuf_data_iova(m_seg);
+
+ while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) &&
+ unlikely(slen > CI_MAX_DATA_PER_TXD)) {
+ txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
+ txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)CI_MAX_DATA_PER_TXD << CI_TXD_QW1_TX_BUF_SZ_S) |
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
+
+ buf_dma_addr += CI_MAX_DATA_PER_TXD;
+ slen -= CI_MAX_DATA_PER_TXD;
+
+ txe->last_id = tx_last;
+ tx_id = txe->next_id;
+ txe = txn;
+ txd = &ci_tx_ring[tx_id];
+ txn = &sw_ring[txe->next_id];
+ }
+
+ txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
+ txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S) |
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
+
+ txe->last_id = tx_last;
+ tx_id = txe->next_id;
+ txe = txn;
+ m_seg = m_seg->next;
+ } while (m_seg);
+
+ /* fill the last descriptor with End of Packet (EOP) bit */
+ td_cmd |= CI_TX_DESC_CMD_EOP;
+ txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
+ txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
+
+ /* set RS bit on the last descriptor of one packet */
+ if (txq->nb_tx_used >= txq->tx_rs_thresh) {
+ td_cmd |= CI_TX_DESC_CMD_RS;
+
+ /* Update txq RS bit counters */
+ txq->nb_tx_used = 0;
+ }
+ txd->cmd_type_offset_bsz |=
+ rte_cpu_to_le_64(((uint64_t)td_cmd) << CI_TXD_QW1_CMD_S);
+
+ if (ts_fns != NULL)
+ ts_id = ts_fns->write_ts_desc(txq, tx_pkt, tx_id, ts_id);
+ }
+end_of_tx:
+ /* update Tail register */
+ if (ts_fns != NULL)
+ ts_fns->write_ts_tail(txq, ts_id);
+ else
+ rte_write32_wc(tx_id, txq->qtx_tail);
+ txq->tx_tail = tx_id;
+
+ return nb_tx;
+}
#endif /* _COMMON_INTEL_TX_SCALAR_FNS_H_ */
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 0b0179e1fa..384676cfc2 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -3045,228 +3045,64 @@ get_context_desc(uint64_t ol_flags, const struct rte_mbuf *tx_pkt,
return 1;
}
-uint16_t
-ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+static uint16_t
+ice_get_ts_tail(struct ci_tx_queue *txq)
{
- struct ci_tx_queue *txq;
- volatile struct ci_tx_desc *ci_tx_ring;
- volatile struct ci_tx_desc *txd;
- struct ci_tx_entry *sw_ring;
- struct ci_tx_entry *txe, *txn;
- struct rte_mbuf *tx_pkt;
- struct rte_mbuf *m_seg;
- uint16_t tx_id;
- uint16_t ts_id = -1;
- uint16_t nb_tx;
- uint16_t nb_used;
- uint16_t nb_ctx;
- uint32_t td_cmd = 0;
- uint32_t td_offset = 0;
- uint32_t td_tag = 0;
- uint16_t tx_last;
- uint16_t slen;
- uint64_t buf_dma_addr;
- uint64_t ol_flags;
- union ci_tx_offload tx_offload = {0};
-
- txq = tx_queue;
- sw_ring = txq->sw_ring;
- ci_tx_ring = txq->ci_tx_ring;
- tx_id = txq->tx_tail;
- txe = &sw_ring[tx_id];
-
- if (txq->tsq != NULL && txq->tsq->ts_flag > 0)
- ts_id = txq->tsq->ts_tail;
-
- /* Check if the descriptor ring needs to be cleaned. */
- if (txq->nb_tx_free < txq->tx_free_thresh)
- (void)ci_tx_xmit_cleanup(txq);
-
- for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
- uint64_t cd_qw0, cd_qw1;
- tx_pkt = *tx_pkts++;
-
- td_cmd = 0;
- td_tag = 0;
- td_offset = 0;
- ol_flags = tx_pkt->ol_flags;
-
- tx_offload.l2_len = tx_pkt->l2_len;
- tx_offload.l3_len = tx_pkt->l3_len;
- tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
- tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
- tx_offload.l4_len = tx_pkt->l4_len;
- tx_offload.tso_segsz = tx_pkt->tso_segsz;
-
- /* Calculate the number of context descriptors needed. */
- nb_ctx = get_context_desc(ol_flags, tx_pkt, &tx_offload,
- txq, &td_offset, &cd_qw0, &cd_qw1);
-
- /* The number of descriptors that must be allocated for
- * a packet equals to the number of the segments of that
- * packet plus the number of context descriptor if needed.
- * Recalculate the needed tx descs when TSO enabled in case
- * the mbuf data size exceeds max data size that hw allows
- * per tx desc.
- */
- if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
- nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx);
- else
- nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
- tx_last = (uint16_t)(tx_id + nb_used - 1);
-
- /* Circular ring */
- if (tx_last >= txq->nb_tx_desc)
- tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
-
- if (nb_used > txq->nb_tx_free) {
- if (ci_tx_xmit_cleanup(txq) != 0) {
- if (nb_tx == 0)
- return 0;
- goto end_of_tx;
- }
- if (unlikely(nb_used > txq->tx_rs_thresh)) {
- while (nb_used > txq->nb_tx_free) {
- if (ci_tx_xmit_cleanup(txq) != 0) {
- if (nb_tx == 0)
- return 0;
- goto end_of_tx;
- }
- }
- }
- }
-
- /* Descriptor based VLAN insertion */
- if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
- td_cmd |= CI_TX_DESC_CMD_IL2TAG1;
- td_tag = tx_pkt->vlan_tci;
- }
-
- /* Enable checksum offloading */
- if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
- ci_txd_enable_checksum(ol_flags, &td_cmd,
- &td_offset, tx_offload);
-
- if (nb_ctx) {
- /* Setup TX context descriptor if required */
- uint64_t *ctx_txd = RTE_CAST_PTR(uint64_t *, &ci_tx_ring[tx_id]);
-
- txn = &sw_ring[txe->next_id];
- RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
- if (txe->mbuf) {
- rte_pktmbuf_free_seg(txe->mbuf);
- txe->mbuf = NULL;
- }
-
- ctx_txd[0] = cd_qw0;
- ctx_txd[1] = cd_qw1;
-
- txe->last_id = tx_last;
- tx_id = txe->next_id;
- txe = txn;
- }
- m_seg = tx_pkt;
-
- do {
- txd = &ci_tx_ring[tx_id];
- txn = &sw_ring[txe->next_id];
-
- if (txe->mbuf)
- rte_pktmbuf_free_seg(txe->mbuf);
- txe->mbuf = m_seg;
-
- /* Setup TX Descriptor */
- slen = m_seg->data_len;
- buf_dma_addr = rte_mbuf_data_iova(m_seg);
-
- while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) &&
- unlikely(slen > CI_MAX_DATA_PER_TXD)) {
- txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
- ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
- ((uint64_t)CI_MAX_DATA_PER_TXD << CI_TXD_QW1_TX_BUF_SZ_S) |
- ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
-
- buf_dma_addr += CI_MAX_DATA_PER_TXD;
- slen -= CI_MAX_DATA_PER_TXD;
-
- txe->last_id = tx_last;
- tx_id = txe->next_id;
- txe = txn;
- txd = &ci_tx_ring[tx_id];
- txn = &sw_ring[txe->next_id];
- }
-
- txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
- ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
- ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S) |
- ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
-
- txe->last_id = tx_last;
- tx_id = txe->next_id;
- txe = txn;
- m_seg = m_seg->next;
- } while (m_seg);
+ return txq->tsq->ts_tail;
+}
- /* fill the last descriptor with End of Packet (EOP) bit */
- td_cmd |= CI_TX_DESC_CMD_EOP;
- txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
+static uint16_t
+ice_write_ts_desc(struct ci_tx_queue *txq,
+ struct rte_mbuf *tx_pkt,
+ uint16_t tx_id,
+ uint16_t ts_id)
+{
+ uint64_t txtime = *RTE_MBUF_DYNFIELD(tx_pkt, txq->tsq->ts_offset, uint64_t *);
+ uint32_t tstamp = (uint32_t)(txtime % NS_PER_S) >> ICE_TXTIME_CTX_RESOLUTION_128NS;
+ const uint32_t desc_tx_id = (tx_id == 0) ? txq->nb_tx_desc : tx_id;
+ __le32 ts_desc = rte_cpu_to_le_32(FIELD_PREP(ICE_TXTIME_TX_DESC_IDX_M, desc_tx_id) |
+ FIELD_PREP(ICE_TXTIME_STAMP_M, tstamp));
+
+ txq->tsq->ice_ts_ring[ts_id].tx_desc_idx_tstamp = ts_desc;
+ ts_id++;
+
+ /* To prevent an MDD, when wrapping the tstamp
+ * ring create additional TS descriptors equal
+ * to the number of the fetch TS descriptors
+ * value. HW will merge the TS descriptors with
+ * the same timestamp value into a single
+ * descriptor.
+ */
+ if (ts_id == txq->tsq->nb_ts_desc) {
+ uint16_t fetch = txq->tsq->nb_ts_desc - txq->nb_tx_desc;
+ ts_id = 0;
+ for (; ts_id < fetch; ts_id++)
+ txq->tsq->ice_ts_ring[ts_id].tx_desc_idx_tstamp = ts_desc;
+ }
+ return ts_id;
+}
- /* set RS bit on the last descriptor of one packet */
- if (txq->nb_tx_used >= txq->tx_rs_thresh) {
- PMD_TX_LOG(DEBUG,
- "Setting RS bit on TXD id="
- "%4u (port=%d queue=%d)",
- tx_last, txq->port_id, txq->queue_id);
+static void
+ice_write_ts_tail(struct ci_tx_queue *txq, uint16_t ts_tail)
+{
+ ICE_PCI_REG_WRITE(txq->qtx_tail, ts_tail);
+ txq->tsq->ts_tail = ts_tail;
+}
- td_cmd |= CI_TX_DESC_CMD_RS;
+uint16_t
+ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ const struct ci_timesstamp_queue_fns ts_fns = {
+ .get_ts_tail = ice_get_ts_tail,
+ .write_ts_desc = ice_write_ts_desc,
+ .write_ts_tail = ice_write_ts_tail,
+ };
+ struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
- /* Update txq RS bit counters */
- txq->nb_tx_used = 0;
- }
- txd->cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)td_cmd) << CI_TXD_QW1_CMD_S);
-
- if (txq->tsq != NULL && txq->tsq->ts_flag > 0) {
- uint64_t txtime = *RTE_MBUF_DYNFIELD(tx_pkt,
- txq->tsq->ts_offset, uint64_t *);
- uint32_t tstamp = (uint32_t)(txtime % NS_PER_S) >>
- ICE_TXTIME_CTX_RESOLUTION_128NS;
- const uint32_t desc_tx_id = (tx_id == 0) ? txq->nb_tx_desc : tx_id;
- __le32 ts_desc = rte_cpu_to_le_32(FIELD_PREP(ICE_TXTIME_TX_DESC_IDX_M,
- desc_tx_id) | FIELD_PREP(ICE_TXTIME_STAMP_M, tstamp));
- txq->tsq->ice_ts_ring[ts_id].tx_desc_idx_tstamp = ts_desc;
- ts_id++;
- /* To prevent an MDD, when wrapping the tstamp
- * ring create additional TS descriptors equal
- * to the number of the fetch TS descriptors
- * value. HW will merge the TS descriptors with
- * the same timestamp value into a single
- * descriptor.
- */
- if (ts_id == txq->tsq->nb_ts_desc) {
- uint16_t fetch = txq->tsq->nb_ts_desc - txq->nb_tx_desc;
- ts_id = 0;
- for (; ts_id < fetch; ts_id++)
- txq->tsq->ice_ts_ring[ts_id].tx_desc_idx_tstamp = ts_desc;
- }
- }
- }
-end_of_tx:
- /* update Tail register */
- if (txq->tsq != NULL && txq->tsq->ts_flag > 0) {
- ICE_PCI_REG_WRITE(txq->qtx_tail, ts_id);
- txq->tsq->ts_tail = ts_id;
- } else {
- ICE_PCI_REG_WRITE(txq->qtx_tail, tx_id);
- }
- txq->tx_tail = tx_id;
+ if (txq->tsq != NULL && txq->tsq->ts_flag > 0)
+ return ci_xmit_pkts(txq, tx_pkts, nb_pkts, get_context_desc, &ts_fns);
- return nb_tx;
+ return ci_xmit_pkts(txq, tx_pkts, nb_pkts, get_context_desc, NULL);
}
static __rte_always_inline int
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 13/27] net/i40e: use common scalar Tx function
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (11 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 12/27] net/intel: create a common scalar Tx function Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 14/27] net/intel: add IPSec hooks to common " Bruce Richardson
` (13 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
Following earlier rework, the scalar transmit function for i40e can use
the common function previously moved over from ice driver. This saves
hundreds of duplicated lines of code.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/i40e/i40e_rxtx.c | 206 +----------------------------
1 file changed, 2 insertions(+), 204 deletions(-)
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 5d1b2e4217..ecec70e0ac 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -1015,210 +1015,8 @@ get_context_desc(uint64_t ol_flags, const struct rte_mbuf *tx_pkt,
uint16_t
i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
- struct ci_tx_queue *txq;
- struct ci_tx_entry *sw_ring;
- struct ci_tx_entry *txe, *txn;
- volatile struct ci_tx_desc *txd;
- volatile struct ci_tx_desc *txr;
- struct rte_mbuf *tx_pkt;
- struct rte_mbuf *m_seg;
- uint16_t tx_id;
- uint16_t nb_tx;
- uint32_t td_cmd;
- uint32_t td_offset;
- uint32_t td_tag;
- uint64_t ol_flags;
- uint16_t nb_used;
- uint16_t nb_ctx;
- uint16_t tx_last;
- uint16_t slen;
- uint64_t buf_dma_addr;
- union ci_tx_offload tx_offload = {0};
-
- txq = tx_queue;
- sw_ring = txq->sw_ring;
- txr = txq->ci_tx_ring;
- tx_id = txq->tx_tail;
- txe = &sw_ring[tx_id];
-
- /* Check if the descriptor ring needs to be cleaned. */
- if (txq->nb_tx_free < txq->tx_free_thresh)
- (void)ci_tx_xmit_cleanup(txq);
-
- for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
- td_cmd = 0;
- td_tag = 0;
- td_offset = 0;
-
- tx_pkt = *tx_pkts++;
- RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
-
- ol_flags = tx_pkt->ol_flags;
- tx_offload.l2_len = tx_pkt->l2_len;
- tx_offload.l3_len = tx_pkt->l3_len;
- tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
- tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
- tx_offload.l4_len = tx_pkt->l4_len;
- tx_offload.tso_segsz = tx_pkt->tso_segsz;
-
- /* Calculate the number of context descriptors needed. */
- uint64_t cd_qw0 = 0, cd_qw1 = 0;
- nb_ctx = get_context_desc(ol_flags, tx_pkt, &tx_offload, txq, &td_offset,
- &cd_qw0, &cd_qw1);
-
- /**
- * The number of descriptors that must be allocated for
- * a packet equals to the number of the segments of that
- * packet plus 1 context descriptor if needed.
- * Recalculate the needed tx descs when TSO enabled in case
- * the mbuf data size exceeds max data size that hw allows
- * per tx desc.
- */
- if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
- nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx);
- else
- nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
- tx_last = (uint16_t)(tx_id + nb_used - 1);
-
- /* Circular ring */
- if (tx_last >= txq->nb_tx_desc)
- tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
-
- if (nb_used > txq->nb_tx_free) {
- if (ci_tx_xmit_cleanup(txq) != 0) {
- if (nb_tx == 0)
- return 0;
- goto end_of_tx;
- }
- if (unlikely(nb_used > txq->tx_rs_thresh)) {
- while (nb_used > txq->nb_tx_free) {
- if (ci_tx_xmit_cleanup(txq) != 0) {
- if (nb_tx == 0)
- return 0;
- goto end_of_tx;
- }
- }
- }
- }
-
- /* Descriptor based VLAN insertion */
- if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
- td_cmd |= CI_TX_DESC_CMD_IL2TAG1;
- td_tag = tx_pkt->vlan_tci;
- }
-
- /* Always enable CRC offload insertion */
- td_cmd |= CI_TX_DESC_CMD_ICRC;
-
- /* Enable checksum offloading */
- if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
- ci_txd_enable_checksum(ol_flags, &td_cmd,
- &td_offset, tx_offload);
-
- if (nb_ctx) {
- /* Setup TX context descriptor if required */
- uint64_t *desc = RTE_CAST_PTR(uint64_t *, &txr[tx_id]);
-
- txn = &sw_ring[txe->next_id];
- RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
- if (txe->mbuf != NULL) {
- rte_pktmbuf_free_seg(txe->mbuf);
- txe->mbuf = NULL;
- }
-
- desc[0] = cd_qw0;
- desc[1] = cd_qw1;
-
- PMD_TX_LOG(DEBUG, "mbuf: %p, TCD[%u]: "
- "qw0: %#"PRIx64"; "
- "qw1: %#"PRIx64";",
- tx_pkt, tx_id, cd_qw0, cd_qw1);
-
- txe->last_id = tx_last;
- tx_id = txe->next_id;
- txe = txn;
- }
-
- m_seg = tx_pkt;
- do {
- txd = &txr[tx_id];
- txn = &sw_ring[txe->next_id];
-
- if (txe->mbuf)
- rte_pktmbuf_free_seg(txe->mbuf);
- txe->mbuf = m_seg;
-
- /* Setup TX Descriptor */
- slen = m_seg->data_len;
- buf_dma_addr = rte_mbuf_data_iova(m_seg);
-
- while ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) &&
- unlikely(slen > CI_MAX_DATA_PER_TXD)) {
- txd->buffer_addr =
- rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz =
- i40e_build_ctob(td_cmd,
- td_offset, CI_MAX_DATA_PER_TXD,
- td_tag);
-
- buf_dma_addr += CI_MAX_DATA_PER_TXD;
- slen -= CI_MAX_DATA_PER_TXD;
-
- txe->last_id = tx_last;
- tx_id = txe->next_id;
- txe = txn;
- txd = &txr[tx_id];
- txn = &sw_ring[txe->next_id];
- }
- PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]: "
- "buf_dma_addr: %#"PRIx64"; "
- "td_cmd: %#x; "
- "td_offset: %#x; "
- "td_len: %u; "
- "td_tag: %#x;",
- tx_pkt, tx_id, buf_dma_addr,
- td_cmd, td_offset, slen, td_tag);
-
- txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz = i40e_build_ctob(td_cmd,
- td_offset, slen, td_tag);
- txe->last_id = tx_last;
- tx_id = txe->next_id;
- txe = txn;
- m_seg = m_seg->next;
- } while (m_seg != NULL);
-
- /* The last packet data descriptor needs End Of Packet (EOP) */
- td_cmd |= CI_TX_DESC_CMD_EOP;
- txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
-
- if (txq->nb_tx_used >= txq->tx_rs_thresh) {
- PMD_TX_LOG(DEBUG,
- "Setting RS bit on TXD id="
- "%4u (port=%d queue=%d)",
- tx_last, txq->port_id, txq->queue_id);
-
- td_cmd |= CI_TX_DESC_CMD_RS;
-
- /* Update txq RS bit counters */
- txq->nb_tx_used = 0;
- }
-
- txd->cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)td_cmd) << CI_TXD_QW1_CMD_S);
- }
-
-end_of_tx:
- PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
- (unsigned) txq->port_id, (unsigned) txq->queue_id,
- (unsigned) tx_id, (unsigned) nb_tx);
-
- rte_io_wmb();
- I40E_PCI_REG_WC_WRITE_RELAXED(txq->qtx_tail, tx_id);
- txq->tx_tail = tx_id;
-
- return nb_tx;
+ /* i40e does not support timestamp queues, so pass NULL for ts_fns */
+ return ci_xmit_pkts(tx_queue, tx_pkts, nb_pkts, get_context_desc, NULL);
}
static __rte_always_inline int
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 14/27] net/intel: add IPSec hooks to common Tx function
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (12 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 13/27] net/i40e: use " Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 15/27] net/intel: support configurable VLAN tag insertion on Tx Bruce Richardson
` (12 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Anatoly Burakov
The iavf driver has IPSec offload support on Tx, so add hooks to the
common Tx function to support that. Do so in a way that has zero
performance impact for drivers which do not have IPSec support, by
passing in compile-time NULL constants for the function pointers, which
can be optimized away by the compiler.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 60 ++++++++++++++++++++++--
drivers/net/intel/i40e/i40e_rxtx.c | 4 +-
drivers/net/intel/ice/ice_rxtx.c | 4 +-
3 files changed, 60 insertions(+), 8 deletions(-)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 70b22f1da0..8c0de26537 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -152,6 +152,24 @@ typedef uint16_t (*ci_get_ctx_desc_fn)(uint64_t ol_flags, const struct rte_mbuf
const union ci_tx_offload *tx_offload, const struct ci_tx_queue *txq,
uint32_t *td_offset, uint64_t *qw0, uint64_t *qw1);
+/* gets IPsec descriptor information and returns number of descriptors needed (0 or 1) */
+typedef uint16_t (*get_ipsec_desc_t)(const struct rte_mbuf *mbuf,
+ const struct ci_tx_queue *txq,
+ void **ipsec_metadata,
+ uint64_t *qw0,
+ uint64_t *qw1);
+/* calculates segment length for IPsec + TSO combinations */
+typedef uint16_t (*calc_ipsec_segment_len_t)(const struct rte_mbuf *mb_seg,
+ uint64_t ol_flags,
+ const void *ipsec_metadata,
+ uint16_t tlen);
+
+/** IPsec descriptor operations for drivers that support inline IPsec crypto. */
+struct ci_ipsec_ops {
+ get_ipsec_desc_t get_ipsec_desc;
+ calc_ipsec_segment_len_t calc_segment_len;
+};
+
/* gets current timestamp tail index */
typedef uint16_t (*get_ts_tail_t)(struct ci_tx_queue *txq);
/* writes a timestamp descriptor and returns new tail index */
@@ -171,6 +189,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts,
ci_get_ctx_desc_fn get_ctx_desc,
+ const struct ci_ipsec_ops *ipsec_ops,
const struct ci_timesstamp_queue_fns *ts_fns)
{
volatile struct ci_tx_desc *ci_tx_ring;
@@ -206,6 +225,9 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
(void)ci_tx_xmit_cleanup(txq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+ void *ipsec_md = NULL;
+ uint16_t nb_ipsec = 0;
+ uint64_t ipsec_qw0 = 0, ipsec_qw1 = 0;
uint64_t cd_qw0, cd_qw1;
tx_pkt = *tx_pkts++;
@@ -225,17 +247,22 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
nb_ctx = get_ctx_desc(ol_flags, tx_pkt, &tx_offload,
txq, &td_offset, &cd_qw0, &cd_qw1);
+ /* Get IPsec descriptor information if IPsec ops provided */
+ if (ipsec_ops != NULL)
+ nb_ipsec = ipsec_ops->get_ipsec_desc(tx_pkt, txq, &ipsec_md,
+ &ipsec_qw0, &ipsec_qw1);
+
/* The number of descriptors that must be allocated for
* a packet equals to the number of the segments of that
- * packet plus the number of context descriptor if needed.
+ * packet plus the number of context and IPsec descriptors if needed.
* Recalculate the needed tx descs when TSO enabled in case
* the mbuf data size exceeds max data size that hw allows
* per tx desc.
*/
if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
- nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx);
+ nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx + nb_ipsec);
else
- nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+ nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx + nb_ipsec);
tx_last = (uint16_t)(tx_id + nb_used - 1);
/* Circular ring */
@@ -288,6 +315,26 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
tx_id = txe->next_id;
txe = txn;
}
+
+ if (ipsec_ops != NULL && nb_ipsec > 0) {
+ /* Setup TX IPsec descriptor if required */
+ uint64_t *ipsec_txd = RTE_CAST_PTR(uint64_t *, &ci_tx_ring[tx_id]);
+
+ txn = &sw_ring[txe->next_id];
+ RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
+ if (txe->mbuf) {
+ rte_pktmbuf_free_seg(txe->mbuf);
+ txe->mbuf = NULL;
+ }
+
+ ipsec_txd[0] = ipsec_qw0;
+ ipsec_txd[1] = ipsec_qw1;
+
+ txe->last_id = tx_last;
+ tx_id = txe->next_id;
+ txe = txn;
+ }
+
m_seg = tx_pkt;
do {
@@ -299,7 +346,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
txe->mbuf = m_seg;
/* Setup TX Descriptor */
- slen = m_seg->data_len;
+ /* Calculate segment length, using IPsec callback if provided */
+ if (ipsec_ops != NULL)
+ slen = ipsec_ops->calc_segment_len(m_seg, ol_flags, ipsec_md, 0);
+ else
+ slen = m_seg->data_len;
+
buf_dma_addr = rte_mbuf_data_iova(m_seg);
while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) &&
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index ecec70e0ac..e22fcfff60 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -1015,8 +1015,8 @@ get_context_desc(uint64_t ol_flags, const struct rte_mbuf *tx_pkt,
uint16_t
i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
- /* i40e does not support timestamp queues, so pass NULL for ts_fns */
- return ci_xmit_pkts(tx_queue, tx_pkts, nb_pkts, get_context_desc, NULL);
+ /* i40e does not support IPsec or timestamp queues, so pass NULL for both */
+ return ci_xmit_pkts(tx_queue, tx_pkts, nb_pkts, get_context_desc, NULL, NULL);
}
static __rte_always_inline int
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 384676cfc2..49ed6b8399 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -3100,9 +3100,9 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
if (txq->tsq != NULL && txq->tsq->ts_flag > 0)
- return ci_xmit_pkts(txq, tx_pkts, nb_pkts, get_context_desc, &ts_fns);
+ return ci_xmit_pkts(txq, tx_pkts, nb_pkts, get_context_desc, NULL, &ts_fns);
- return ci_xmit_pkts(txq, tx_pkts, nb_pkts, get_context_desc, NULL);
+ return ci_xmit_pkts(txq, tx_pkts, nb_pkts, get_context_desc, NULL, NULL);
}
static __rte_always_inline int
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 15/27] net/intel: support configurable VLAN tag insertion on Tx
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (13 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 14/27] net/intel: add IPSec hooks to common " Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 16/27] net/iavf: use common scalar Tx function Bruce Richardson
` (11 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Anatoly Burakov
Make the VLAN tag insertion logic configurable in the common code, as to
where inner/outer tags get placed.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 10 ++++++++++
drivers/net/intel/common/tx_scalar_fns.h | 9 +++++++--
drivers/net/intel/i40e/i40e_rxtx.c | 4 ++--
drivers/net/intel/ice/ice_rxtx.c | 4 ++--
4 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index a71b98f119..0d11daaab3 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -45,6 +45,16 @@
#define CI_TX_CTX_DESC_TSYN 0x02
#define CI_TX_CTX_DESC_IL2TAG2 0x04
+/**
+ * L2TAG1 Field Source Selection
+ * Specifies which mbuf VLAN field to use for the L2TAG1 field in data descriptors.
+ * Context descriptor VLAN handling (L2TAG2) is managed by driver-specific callbacks.
+ */
+enum ci_tx_l2tag1_field {
+ CI_VLAN_IN_L2TAG1, /**< For VLAN (not QinQ), use L2Tag1 field in data desc */
+ CI_VLAN_IN_L2TAG2, /**< For VLAN (not QinQ), use L2Tag2 field in ctx desc */
+};
+
/* Common TX Descriptor Length Field Shifts */
#define CI_TX_DESC_LEN_MACLEN_S 0 /* 7 BITS */
#define CI_TX_DESC_LEN_IPLEN_S 7 /* 7 BITS */
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 8c0de26537..6079a558e4 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -188,6 +188,7 @@ static inline uint16_t
ci_xmit_pkts(struct ci_tx_queue *txq,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts,
+ enum ci_tx_l2tag1_field l2tag1_field,
ci_get_ctx_desc_fn get_ctx_desc,
const struct ci_ipsec_ops *ipsec_ops,
const struct ci_timesstamp_queue_fns *ts_fns)
@@ -286,8 +287,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
}
}
- /* Descriptor based VLAN insertion */
- if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+ /* Descriptor based VLAN/QinQ insertion */
+ /* for single vlan offload, only insert in data desc with VLAN_IN_L2TAG1 is set
+ * for qinq offload, we always put inner tag in L2Tag1
+ */
+ if (((ol_flags & RTE_MBUF_F_TX_VLAN) && (l2tag1_field == CI_VLAN_IN_L2TAG1)) ||
+ (ol_flags & RTE_MBUF_F_TX_QINQ)) {
td_cmd |= CI_TX_DESC_CMD_IL2TAG1;
td_tag = tx_pkt->vlan_tci;
}
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index e22fcfff60..2d12e6dd1a 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -1002,7 +1002,7 @@ get_context_desc(uint64_t ol_flags, const struct rte_mbuf *tx_pkt,
/* TX context descriptor based double VLAN insert */
if (ol_flags & RTE_MBUF_F_TX_QINQ) {
cd_l2tag2 = tx_pkt->vlan_tci_outer;
- cd_type_cmd_tso_mss |= ((uint64_t)I40E_TX_CTX_DESC_IL2TAG2 << I40E_TXD_CTX_QW1_CMD_SHIFT);
+ cd_type_cmd_tso_mss |= (I40E_TX_CTX_DESC_IL2TAG2 << I40E_TXD_CTX_QW1_CMD_SHIFT);
}
*qw0 = rte_cpu_to_le_32(cd_tunneling_params) |
@@ -1016,7 +1016,7 @@ uint16_t
i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
/* i40e does not support IPsec or timestamp queues, so pass NULL for both */
- return ci_xmit_pkts(tx_queue, tx_pkts, nb_pkts, get_context_desc, NULL, NULL);
+ return ci_xmit_pkts(tx_queue, tx_pkts, nb_pkts, CI_VLAN_IN_L2TAG1, get_context_desc, NULL, NULL);
}
static __rte_always_inline int
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 49ed6b8399..2c73011181 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -3100,9 +3100,9 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
if (txq->tsq != NULL && txq->tsq->ts_flag > 0)
- return ci_xmit_pkts(txq, tx_pkts, nb_pkts, get_context_desc, NULL, &ts_fns);
+ return ci_xmit_pkts(txq, tx_pkts, nb_pkts, CI_VLAN_IN_L2TAG1, get_context_desc, NULL, &ts_fns);
- return ci_xmit_pkts(txq, tx_pkts, nb_pkts, get_context_desc, NULL, NULL);
+ return ci_xmit_pkts(txq, tx_pkts, nb_pkts, CI_VLAN_IN_L2TAG1, get_context_desc, NULL, NULL);
}
static __rte_always_inline int
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 16/27] net/iavf: use common scalar Tx function
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (14 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 15/27] net/intel: support configurable VLAN tag insertion on Tx Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 17/27] net/i40e: document requirement for QinQ support Bruce Richardson
` (10 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Vladimir Medvedkin
Now that the common scalar Tx function has all necessary hooks for the
features supported by the iavf driver, use the common function to avoid
duplicated code.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/iavf/iavf_rxtx.c | 534 ++++++-----------------------
1 file changed, 109 insertions(+), 425 deletions(-)
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 9ce978e69c..f96876ca46 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -2327,7 +2327,7 @@ iavf_recv_pkts_bulk_alloc(void *rx_queue,
/* Check if the context descriptor is needed for TX offloading */
static inline uint16_t
-iavf_calc_context_desc(struct rte_mbuf *mb, uint8_t vlan_flag)
+iavf_calc_context_desc(const struct rte_mbuf *mb, uint8_t vlan_flag)
{
uint64_t flags = mb->ol_flags;
if (flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG |
@@ -2345,44 +2345,7 @@ iavf_calc_context_desc(struct rte_mbuf *mb, uint8_t vlan_flag)
}
static inline void
-iavf_fill_ctx_desc_cmd_field(volatile uint64_t *field, struct rte_mbuf *m,
- uint8_t vlan_flag)
-{
- uint64_t cmd = 0;
-
- /* TSO enabled */
- if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
- cmd = CI_TX_CTX_DESC_TSO << IAVF_TXD_CTX_QW1_CMD_SHIFT;
-
- if ((m->ol_flags & RTE_MBUF_F_TX_VLAN &&
- vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) ||
- m->ol_flags & RTE_MBUF_F_TX_QINQ) {
- cmd |= CI_TX_CTX_DESC_IL2TAG2
- << IAVF_TXD_CTX_QW1_CMD_SHIFT;
- }
-
- if (IAVF_CHECK_TX_LLDP(m))
- cmd |= IAVF_TX_CTX_DESC_SWTCH_UPLINK
- << IAVF_TXD_CTX_QW1_CMD_SHIFT;
-
- *field |= cmd;
-}
-
-static inline void
-iavf_fill_ctx_desc_ipsec_field(volatile uint64_t *field,
- struct iavf_ipsec_crypto_pkt_metadata *ipsec_md)
-{
- uint64_t ipsec_field =
- (uint64_t)ipsec_md->ctx_desc_ipsec_params <<
- IAVF_TXD_CTX_QW1_IPSEC_PARAMS_CIPHERBLK_SHIFT;
-
- *field |= ipsec_field;
-}
-
-
-static inline void
-iavf_fill_ctx_desc_tunnelling_field(volatile uint64_t *qw0,
- const struct rte_mbuf *m)
+iavf_fill_ctx_desc_tunnelling_field(uint64_t *qw0, const struct rte_mbuf *m)
{
uint64_t eip_typ = IAVF_TX_CTX_DESC_EIPT_NONE;
uint64_t eip_len = 0;
@@ -2457,7 +2420,7 @@ iavf_fill_ctx_desc_tunnelling_field(volatile uint64_t *qw0,
static inline uint16_t
iavf_fill_ctx_desc_segmentation_field(volatile uint64_t *field,
- struct rte_mbuf *m, struct iavf_ipsec_crypto_pkt_metadata *ipsec_md)
+ const struct rte_mbuf *m, struct iavf_ipsec_crypto_pkt_metadata *ipsec_md)
{
uint64_t segmentation_field = 0;
uint64_t total_length = 0;
@@ -2496,59 +2459,31 @@ struct iavf_tx_context_desc_qws {
__le64 qw1;
};
-static inline void
-iavf_fill_context_desc(volatile struct iavf_tx_context_desc *desc,
- struct rte_mbuf *m, struct iavf_ipsec_crypto_pkt_metadata *ipsec_md,
- uint16_t *tlen, uint8_t vlan_flag)
+/* IPsec callback for ci_xmit_pkts - gets IPsec descriptor information */
+static uint16_t
+iavf_get_ipsec_desc(const struct rte_mbuf *mbuf, const struct ci_tx_queue *txq,
+ void **ipsec_metadata, uint64_t *qw0, uint64_t *qw1)
{
- volatile struct iavf_tx_context_desc_qws *desc_qws =
- (volatile struct iavf_tx_context_desc_qws *)desc;
- /* fill descriptor type field */
- desc_qws->qw1 = IAVF_TX_DESC_DTYPE_CONTEXT;
-
- /* fill command field */
- iavf_fill_ctx_desc_cmd_field(&desc_qws->qw1, m, vlan_flag);
-
- /* fill segmentation field */
- if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
- /* fill IPsec field */
- if (m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)
- iavf_fill_ctx_desc_ipsec_field(&desc_qws->qw1,
- ipsec_md);
-
- *tlen = iavf_fill_ctx_desc_segmentation_field(&desc_qws->qw1,
- m, ipsec_md);
- }
-
- /* fill tunnelling field */
- if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
- iavf_fill_ctx_desc_tunnelling_field(&desc_qws->qw0, m);
- else
- desc_qws->qw0 = 0;
+ struct iavf_ipsec_crypto_pkt_metadata *md;
- desc_qws->qw0 = rte_cpu_to_le_64(desc_qws->qw0);
- desc_qws->qw1 = rte_cpu_to_le_64(desc_qws->qw1);
+ if (!(mbuf->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD))
+ return 0;
- /* vlan_flag specifies VLAN tag location for VLAN, and outer tag location for QinQ. */
- if (m->ol_flags & RTE_MBUF_F_TX_QINQ)
- desc->l2tag2 = vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2 ? m->vlan_tci_outer :
- m->vlan_tci;
- else if (m->ol_flags & RTE_MBUF_F_TX_VLAN && vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)
- desc->l2tag2 = m->vlan_tci;
-}
+ md = RTE_MBUF_DYNFIELD(mbuf, txq->ipsec_crypto_pkt_md_offset,
+ struct iavf_ipsec_crypto_pkt_metadata *);
+ if (!md)
+ return 0;
+ *ipsec_metadata = md;
-static inline void
-iavf_fill_ipsec_desc(volatile struct iavf_tx_ipsec_desc *desc,
- const struct iavf_ipsec_crypto_pkt_metadata *md, uint16_t *ipsec_len)
-{
- desc->qw0 = rte_cpu_to_le_64(((uint64_t)md->l4_payload_len <<
+ /* Fill IPsec descriptor using existing logic */
+ *qw0 = rte_cpu_to_le_64(((uint64_t)md->l4_payload_len <<
IAVF_IPSEC_TX_DESC_QW0_L4PAYLEN_SHIFT) |
((uint64_t)md->esn << IAVF_IPSEC_TX_DESC_QW0_IPSECESN_SHIFT) |
((uint64_t)md->esp_trailer_len <<
IAVF_IPSEC_TX_DESC_QW0_TRAILERLEN_SHIFT));
- desc->qw1 = rte_cpu_to_le_64(((uint64_t)md->sa_idx <<
+ *qw1 = rte_cpu_to_le_64(((uint64_t)md->sa_idx <<
IAVF_IPSEC_TX_DESC_QW1_IPSECSA_SHIFT) |
((uint64_t)md->next_proto <<
IAVF_IPSEC_TX_DESC_QW1_IPSECNH_SHIFT) |
@@ -2557,143 +2492,106 @@ iavf_fill_ipsec_desc(volatile struct iavf_tx_ipsec_desc *desc,
((uint64_t)(md->ol_flags & IAVF_IPSEC_CRYPTO_OL_FLAGS_NATT ?
1ULL : 0ULL) <<
IAVF_IPSEC_TX_DESC_QW1_UDP_SHIFT) |
- (uint64_t)IAVF_TX_DESC_DTYPE_IPSEC);
+ ((uint64_t)IAVF_TX_DESC_DTYPE_IPSEC <<
+ CI_TXD_QW1_DTYPE_S));
- /**
- * TODO: Pre-calculate this in the Session initialization
- *
- * Calculate IPsec length required in data descriptor func when TSO
- * offload is enabled
- */
- *ipsec_len = sizeof(struct rte_esp_hdr) + (md->len_iv >> 2) +
- (md->ol_flags & IAVF_IPSEC_CRYPTO_OL_FLAGS_NATT ?
- sizeof(struct rte_udp_hdr) : 0);
+ return 1; /* One IPsec descriptor needed */
}
-static inline void
-iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1,
- struct rte_mbuf *m, uint8_t vlan_flag)
+/* IPsec callback for ci_xmit_pkts - calculates segment length for IPsec+TSO */
+static uint16_t
+iavf_calc_ipsec_segment_len(const struct rte_mbuf *mb_seg, uint64_t ol_flags,
+ const void *ipsec_metadata, uint16_t tlen)
{
- uint64_t command = 0;
- uint64_t offset = 0;
- uint64_t l2tag1 = 0;
-
- *qw1 = CI_TX_DESC_DTYPE_DATA;
-
- command = (uint64_t)CI_TX_DESC_CMD_ICRC;
-
- /* Descriptor based VLAN insertion */
- if ((vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1) &&
- m->ol_flags & RTE_MBUF_F_TX_VLAN) {
- command |= (uint64_t)CI_TX_DESC_CMD_IL2TAG1;
- l2tag1 |= m->vlan_tci;
- }
-
- /* Descriptor based QinQ insertion. vlan_flag specifies outer tag location. */
- if (m->ol_flags & RTE_MBUF_F_TX_QINQ) {
- command |= (uint64_t)CI_TX_DESC_CMD_IL2TAG1;
- l2tag1 = vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1 ? m->vlan_tci_outer :
- m->vlan_tci;
+ const struct iavf_ipsec_crypto_pkt_metadata *ipsec_md = ipsec_metadata;
+
+ if ((ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) &&
+ (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))) {
+ uint16_t ipseclen = ipsec_md ? (ipsec_md->esp_trailer_len +
+ ipsec_md->len_iv) : 0;
+ uint16_t slen = tlen + mb_seg->l2_len + mb_seg->l3_len +
+ mb_seg->outer_l3_len + ipseclen;
+ if (ol_flags & RTE_MBUF_F_TX_L4_MASK)
+ slen += mb_seg->l4_len;
+ return slen;
}
- if ((m->ol_flags &
- (CI_TX_CKSUM_OFFLOAD_MASK | RTE_MBUF_F_TX_SEC_OFFLOAD)) == 0)
- goto skip_cksum;
+ return mb_seg->data_len;
+}
- /* Set MACLEN */
- if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK &&
- !(m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD))
- offset |= (m->outer_l2_len >> 1)
- << CI_TX_DESC_LEN_MACLEN_S;
- else
- offset |= (m->l2_len >> 1)
- << CI_TX_DESC_LEN_MACLEN_S;
+/* Context descriptor callback for ci_xmit_pkts */
+static uint16_t
+iavf_get_context_desc(uint64_t ol_flags, const struct rte_mbuf *mbuf,
+ const union ci_tx_offload *tx_offload __rte_unused,
+ const struct ci_tx_queue *txq,
+ uint32_t *td_offset __rte_unused, uint64_t *qw0, uint64_t *qw1)
+{
+ uint8_t iavf_vlan_flag;
+ uint16_t cd_l2tag2 = 0;
+ uint64_t cd_type_cmd = IAVF_TX_DESC_DTYPE_CONTEXT;
+ uint64_t cd_tunneling_params = 0;
+ uint16_t tlen = 0;
+ struct iavf_ipsec_crypto_pkt_metadata *ipsec_md = NULL;
+
+ /* Use IAVF-specific vlan_flag from txq */
+ iavf_vlan_flag = txq->vlan_flag;
+
+ /* Check if context descriptor is needed using existing IAVF logic */
+ if (!iavf_calc_context_desc(mbuf, iavf_vlan_flag))
+ return 0;
- /* Enable L3 checksum offloading inner */
- if (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
- if (m->ol_flags & RTE_MBUF_F_TX_IPV4) {
- command |= CI_TX_DESC_CMD_IIPT_IPV4_CSUM;
- offset |= (m->l3_len >> 2) << CI_TX_DESC_LEN_IPLEN_S;
- }
- } else if (m->ol_flags & RTE_MBUF_F_TX_IPV4) {
- command |= CI_TX_DESC_CMD_IIPT_IPV4;
- offset |= (m->l3_len >> 2) << CI_TX_DESC_LEN_IPLEN_S;
- } else if (m->ol_flags & RTE_MBUF_F_TX_IPV6) {
- command |= CI_TX_DESC_CMD_IIPT_IPV6;
- offset |= (m->l3_len >> 2) << CI_TX_DESC_LEN_IPLEN_S;
+ /* Get IPsec metadata if needed */
+ if (ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
+ ipsec_md = RTE_MBUF_DYNFIELD(mbuf, txq->ipsec_crypto_pkt_md_offset,
+ struct iavf_ipsec_crypto_pkt_metadata *);
}
- if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
- if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
- command |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
- else
- command |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
- offset |= (m->l4_len >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
+ /* TSO command field */
+ if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
+ cd_type_cmd |= (uint64_t)CI_TX_CTX_DESC_TSO << IAVF_TXD_CTX_QW1_CMD_SHIFT;
- *qw1 = rte_cpu_to_le_64((((uint64_t)command <<
- IAVF_TXD_DATA_QW1_CMD_SHIFT) & IAVF_TXD_DATA_QW1_CMD_MASK) |
- (((uint64_t)offset << IAVF_TXD_DATA_QW1_OFFSET_SHIFT) &
- IAVF_TXD_DATA_QW1_OFFSET_MASK) |
- ((uint64_t)l2tag1 << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT));
+ /* IPsec field for TSO */
+ if (ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD && ipsec_md) {
+ uint64_t ipsec_field = (uint64_t)ipsec_md->ctx_desc_ipsec_params <<
+ IAVF_TXD_CTX_QW1_IPSEC_PARAMS_CIPHERBLK_SHIFT;
+ cd_type_cmd |= ipsec_field;
+ }
- return;
+ /* TSO segmentation field */
+ tlen = iavf_fill_ctx_desc_segmentation_field(&cd_type_cmd,
+ mbuf, ipsec_md);
+ (void)tlen; /* Suppress unused variable warning */
}
- /* Enable L4 checksum offloads */
- switch (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
- case RTE_MBUF_F_TX_TCP_CKSUM:
- command |= CI_TX_DESC_CMD_L4T_EOFT_TCP;
- offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
- case RTE_MBUF_F_TX_SCTP_CKSUM:
- command |= CI_TX_DESC_CMD_L4T_EOFT_SCTP;
- offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
- case RTE_MBUF_F_TX_UDP_CKSUM:
- command |= CI_TX_DESC_CMD_L4T_EOFT_UDP;
- offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
- CI_TX_DESC_LEN_L4_LEN_S;
- break;
+ /* VLAN field for L2TAG2 */
+ if ((ol_flags & RTE_MBUF_F_TX_VLAN &&
+ iavf_vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) ||
+ ol_flags & RTE_MBUF_F_TX_QINQ) {
+ cd_type_cmd |= (uint64_t)CI_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
}
-skip_cksum:
- *qw1 = rte_cpu_to_le_64((((uint64_t)command <<
- IAVF_TXD_DATA_QW1_CMD_SHIFT) & IAVF_TXD_DATA_QW1_CMD_MASK) |
- (((uint64_t)offset << IAVF_TXD_DATA_QW1_OFFSET_SHIFT) &
- IAVF_TXD_DATA_QW1_OFFSET_MASK) |
- ((uint64_t)l2tag1 << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT));
-}
-
-static inline void
-iavf_fill_data_desc(volatile struct ci_tx_desc *desc,
- uint64_t desc_template, uint16_t buffsz,
- uint64_t buffer_addr)
-{
- /* fill data descriptor qw1 from template */
- desc->cmd_type_offset_bsz = desc_template;
-
- /* set data buffer size */
- desc->cmd_type_offset_bsz |=
- (((uint64_t)buffsz << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
- IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK);
-
- desc->buffer_addr = rte_cpu_to_le_64(buffer_addr);
- desc->cmd_type_offset_bsz = rte_cpu_to_le_64(desc->cmd_type_offset_bsz);
-}
-
+ /* LLDP switching field */
+ if (IAVF_CHECK_TX_LLDP(mbuf))
+ cd_type_cmd |= IAVF_TX_CTX_DESC_SWTCH_UPLINK << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+
+ /* Tunneling field */
+ if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
+ iavf_fill_ctx_desc_tunnelling_field((uint64_t *)&cd_tunneling_params, mbuf);
+
+ /* L2TAG2 field (VLAN) */
+ if (ol_flags & RTE_MBUF_F_TX_QINQ) {
+ cd_l2tag2 = iavf_vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2 ?
+ mbuf->vlan_tci_outer : mbuf->vlan_tci;
+ } else if (ol_flags & RTE_MBUF_F_TX_VLAN &&
+ iavf_vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+ cd_l2tag2 = mbuf->vlan_tci;
+ }
-static struct iavf_ipsec_crypto_pkt_metadata *
-iavf_ipsec_crypto_get_pkt_metadata(const struct ci_tx_queue *txq,
- struct rte_mbuf *m)
-{
- if (m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)
- return RTE_MBUF_DYNFIELD(m, txq->ipsec_crypto_pkt_md_offset,
- struct iavf_ipsec_crypto_pkt_metadata *);
+ /* Set outputs */
+ *qw0 = rte_cpu_to_le_64(cd_tunneling_params | ((uint64_t)cd_l2tag2 << 32));
+ *qw1 = rte_cpu_to_le_64(cd_type_cmd);
- return NULL;
+ return 1; /* One context descriptor needed */
}
/* TX function */
@@ -2701,231 +2599,17 @@ uint16_t
iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct ci_tx_queue *txq = tx_queue;
- volatile struct ci_tx_desc *txr = txq->ci_tx_ring;
- struct ci_tx_entry *txe_ring = txq->sw_ring;
- struct ci_tx_entry *txe, *txn;
- struct rte_mbuf *mb, *mb_seg;
- uint64_t buf_dma_addr;
- uint16_t desc_idx, desc_idx_last;
- uint16_t idx;
- uint16_t slen;
-
-
- /* Check if the descriptor ring needs to be cleaned. */
- if (txq->nb_tx_free < txq->tx_free_thresh)
- ci_tx_xmit_cleanup(txq);
-
- desc_idx = txq->tx_tail;
- txe = &txe_ring[desc_idx];
-
- for (idx = 0; idx < nb_pkts; idx++) {
- volatile struct ci_tx_desc *ddesc;
- struct iavf_ipsec_crypto_pkt_metadata *ipsec_md;
-
- uint16_t nb_desc_ctx, nb_desc_ipsec;
- uint16_t nb_desc_data, nb_desc_required;
- uint16_t tlen = 0, ipseclen = 0;
- uint64_t ddesc_template = 0;
- uint64_t ddesc_cmd = 0;
-
- mb = tx_pkts[idx];
- RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
-
- /**
- * Get metadata for ipsec crypto from mbuf dynamic fields if
- * security offload is specified.
- */
- ipsec_md = iavf_ipsec_crypto_get_pkt_metadata(txq, mb);
-
- nb_desc_data = mb->nb_segs;
- nb_desc_ctx =
- iavf_calc_context_desc(mb, txq->vlan_flag);
- nb_desc_ipsec = !!(mb->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
-
- /**
- * The number of descriptors that must be allocated for
- * a packet equals to the number of the segments of that
- * packet plus the context and ipsec descriptors if needed.
- * Recalculate the needed tx descs when TSO enabled in case
- * the mbuf data size exceeds max data size that hw allows
- * per tx desc.
- */
- if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
- nb_desc_required = ci_calc_pkt_desc(mb) + nb_desc_ctx + nb_desc_ipsec;
- else
- nb_desc_required = nb_desc_data + nb_desc_ctx + nb_desc_ipsec;
-
- desc_idx_last = (uint16_t)(desc_idx + nb_desc_required - 1);
-
- /* wrap descriptor ring */
- if (desc_idx_last >= txq->nb_tx_desc)
- desc_idx_last =
- (uint16_t)(desc_idx_last - txq->nb_tx_desc);
-
- PMD_TX_LOG(DEBUG,
- "port_id=%u queue_id=%u tx_first=%u tx_last=%u",
- txq->port_id, txq->queue_id, desc_idx, desc_idx_last);
-
- if (nb_desc_required > txq->nb_tx_free) {
- if (ci_tx_xmit_cleanup(txq)) {
- if (idx == 0)
- return 0;
- goto end_of_tx;
- }
- if (unlikely(nb_desc_required > txq->tx_rs_thresh)) {
- while (nb_desc_required > txq->nb_tx_free) {
- if (ci_tx_xmit_cleanup(txq)) {
- if (idx == 0)
- return 0;
- goto end_of_tx;
- }
- }
- }
- }
-
- iavf_build_data_desc_cmd_offset_fields(&ddesc_template, mb,
- txq->vlan_flag);
-
- /* Setup TX context descriptor if required */
- if (nb_desc_ctx) {
- volatile struct iavf_tx_context_desc *ctx_desc =
- (volatile struct iavf_tx_context_desc *)
- &txr[desc_idx];
-
- /* clear QW0 or the previous writeback value
- * may impact next write
- */
- *(volatile uint64_t *)ctx_desc = 0;
-
- txn = &txe_ring[txe->next_id];
- RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
-
- if (txe->mbuf) {
- rte_pktmbuf_free_seg(txe->mbuf);
- txe->mbuf = NULL;
- }
-
- iavf_fill_context_desc(ctx_desc, mb, ipsec_md, &tlen,
- txq->vlan_flag);
- IAVF_DUMP_TX_DESC(txq, ctx_desc, desc_idx);
-
- txe->last_id = desc_idx_last;
- desc_idx = txe->next_id;
- txe = txn;
- }
-
- if (nb_desc_ipsec) {
- volatile struct iavf_tx_ipsec_desc *ipsec_desc =
- (volatile struct iavf_tx_ipsec_desc *)
- &txr[desc_idx];
-
- txn = &txe_ring[txe->next_id];
- RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
-
- if (txe->mbuf) {
- rte_pktmbuf_free_seg(txe->mbuf);
- txe->mbuf = NULL;
- }
-
- iavf_fill_ipsec_desc(ipsec_desc, ipsec_md, &ipseclen);
-
- IAVF_DUMP_TX_DESC(txq, ipsec_desc, desc_idx);
-
- txe->last_id = desc_idx_last;
- desc_idx = txe->next_id;
- txe = txn;
- }
-
- mb_seg = mb;
-
- do {
- ddesc = (volatile struct ci_tx_desc *)
- &txr[desc_idx];
-
- txn = &txe_ring[txe->next_id];
- RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
-
- if (txe->mbuf)
- rte_pktmbuf_free_seg(txe->mbuf);
-
- txe->mbuf = mb_seg;
-
- if ((mb_seg->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) &&
- (mb_seg->ol_flags &
- (RTE_MBUF_F_TX_TCP_SEG |
- RTE_MBUF_F_TX_UDP_SEG))) {
- slen = tlen + mb_seg->l2_len + mb_seg->l3_len +
- mb_seg->outer_l3_len + ipseclen;
- if (mb_seg->ol_flags & RTE_MBUF_F_TX_L4_MASK)
- slen += mb_seg->l4_len;
- } else {
- slen = mb_seg->data_len;
- }
-
- buf_dma_addr = rte_mbuf_data_iova(mb_seg);
- while ((mb_seg->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
- RTE_MBUF_F_TX_UDP_SEG)) &&
- unlikely(slen > CI_MAX_DATA_PER_TXD)) {
- iavf_fill_data_desc(ddesc, ddesc_template,
- CI_MAX_DATA_PER_TXD, buf_dma_addr);
-
- IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
-
- buf_dma_addr += CI_MAX_DATA_PER_TXD;
- slen -= CI_MAX_DATA_PER_TXD;
-
- txe->last_id = desc_idx_last;
- desc_idx = txe->next_id;
- txe = txn;
- ddesc = &txr[desc_idx];
- txn = &txe_ring[txe->next_id];
- }
-
- iavf_fill_data_desc(ddesc, ddesc_template,
- slen, buf_dma_addr);
-
- IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
-
- txe->last_id = desc_idx_last;
- desc_idx = txe->next_id;
- txe = txn;
- mb_seg = mb_seg->next;
- } while (mb_seg);
-
- /* The last packet data descriptor needs End Of Packet (EOP) */
- ddesc_cmd = CI_TX_DESC_CMD_EOP;
-
- txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_desc_required);
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_desc_required);
-
- if (txq->nb_tx_used >= txq->tx_rs_thresh) {
- PMD_TX_LOG(DEBUG, "Setting RS bit on TXD id="
- "%4u (port=%d queue=%d)",
- desc_idx_last, txq->port_id, txq->queue_id);
-
- ddesc_cmd |= CI_TX_DESC_CMD_RS;
-
- /* Update txq RS bit counters */
- txq->nb_tx_used = 0;
- }
-
- ddesc->cmd_type_offset_bsz |= rte_cpu_to_le_64(ddesc_cmd <<
- IAVF_TXD_DATA_QW1_CMD_SHIFT);
-
- IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx - 1);
- }
-
-end_of_tx:
- rte_wmb();
-
- PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
- txq->port_id, txq->queue_id, desc_idx, idx);
-
- IAVF_PCI_REG_WRITE_RELAXED(txq->qtx_tail, desc_idx);
- txq->tx_tail = desc_idx;
+ const struct ci_ipsec_ops ipsec_ops = {
+ .get_ipsec_desc = iavf_get_ipsec_desc,
+ .calc_segment_len = iavf_calc_ipsec_segment_len,
+ };
- return idx;
+ /* IAVF does not support timestamp queues, so pass NULL for ts_fns */
+ return ci_xmit_pkts(txq, tx_pkts, nb_pkts,
+ (txq->vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1) ?
+ CI_VLAN_IN_L2TAG1 : CI_VLAN_IN_L2TAG2,
+ iavf_get_context_desc, &ipsec_ops, NULL);
}
/* Check if the packet with vlan user priority is transmitted in the
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 17/27] net/i40e: document requirement for QinQ support
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (15 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 16/27] net/iavf: use common scalar Tx function Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 18/27] net/idpf: use common scalar Tx function Bruce Richardson
` (9 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
In order to get multiple VLANs inserted in an outgoing packet with QinQ
offload the i40e driver needs to be set to double vlan mode. This is
done by using the VLAN_EXTEND Rx config flag. Add a code check for this
dependency and update the docs about it.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
doc/guides/nics/i40e.rst | 18 ++++++++++++++++++
drivers/net/intel/i40e/i40e_rxtx.c | 9 +++++++++
2 files changed, 27 insertions(+)
diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 45dc083c94..cbfaddbdd8 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -245,6 +245,24 @@ Runtime Configuration
* ``segment``: Check number of mbuf segments not exceed hw limitation.
* ``offload``: Check any unsupported offload flag.
+QinQ Configuration
+~~~~~~~~~~~~~~~~~~
+
+When using QinQ TX offload (``RTE_ETH_TX_OFFLOAD_QINQ_INSERT``), you must also
+enable ``RTE_ETH_RX_OFFLOAD_VLAN_EXTEND`` to configure the hardware for double
+VLAN mode. Without this, only the inner VLAN tag will be inserted.
+
+Example::
+
+ struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .offloads = RTE_ETH_RX_OFFLOAD_VLAN_EXTEND,
+ },
+ .txmode = {
+ .offloads = RTE_ETH_TX_OFFLOAD_QINQ_INSERT,
+ },
+ };
+
Vector RX Pre-conditions
~~~~~~~~~~~~~~~~~~~~~~~~
For Vector RX it is assumed that the number of descriptor rings will be a power
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 2d12e6dd1a..aef78c5358 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -2171,6 +2171,15 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
if (!vsi)
return -EINVAL;
+
+ /* Check if QinQ TX offload requires VLAN extend mode */
+ if ((offloads & RTE_ETH_TX_OFFLOAD_QINQ_INSERT) &&
+ !(dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND)) {
+ PMD_DRV_LOG(WARNING, "Port %u: QinQ TX offload is enabled but VLAN extend mode is not set. ",
+ dev->data->port_id);
+ PMD_DRV_LOG(WARNING, "Double VLAN insertion may not work correctly without RTE_ETH_RX_OFFLOAD_VLAN_EXTEND set in Rx configuration.");
+ }
+
q_offset = i40e_get_queue_offset_by_qindex(pf, queue_idx);
if (q_offset < 0)
return -EINVAL;
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 18/27] net/idpf: use common scalar Tx function
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (16 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 17/27] net/i40e: document requirement for QinQ support Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 19/27] net/intel: avoid writing the final pkt descriptor twice Bruce Richardson
` (8 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Jingjing Wu, Praveen Shetty
Update idpf driver to use the common scalar Tx function in single-queue
configuration.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/idpf/idpf_common_rxtx.c | 179 ++--------------------
1 file changed, 11 insertions(+), 168 deletions(-)
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index b34d545a0a..81bc45f6ef 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -8,7 +8,6 @@
#include "idpf_common_rxtx.h"
#include "idpf_common_device.h"
-#include "../common/rx.h"
int idpf_timestamp_dynfield_offset = -1;
uint64_t idpf_timestamp_dynflag;
@@ -848,9 +847,11 @@ idpf_calc_context_desc(uint64_t flags)
/* set TSO context descriptor, returns 0 if no context needed, 1 if context set
*/
static inline uint16_t
-idpf_set_tso_ctx(uint64_t ol_flags, struct rte_mbuf *mbuf,
- union ci_tx_offload tx_offload,
- uint64_t *qw0, uint64_t *qw1)
+idpf_set_tso_ctx(uint64_t ol_flags, const struct rte_mbuf *mbuf,
+ const union ci_tx_offload *tx_offload,
+ const struct ci_tx_queue *txq __rte_unused,
+ uint32_t *td_offset __rte_unused,
+ uint64_t *qw0, uint64_t *qw1)
{
uint16_t cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX | IDPF_TX_FLEX_CTX_DESC_CMD_TSO;
uint16_t tso_segsz = mbuf->tso_segsz;
@@ -861,12 +862,12 @@ idpf_set_tso_ctx(uint64_t ol_flags, struct rte_mbuf *mbuf,
return 0;
/* TSO context descriptor setup */
- if (tx_offload.l4_len == 0) {
+ if (tx_offload->l4_len == 0) {
TX_LOG(DEBUG, "L4 length set to 0");
return 0;
}
- hdr_len = tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len;
+ hdr_len = tx_offload->l2_len + tx_offload->l3_len + tx_offload->l4_len;
tso_len = mbuf->pkt_len - hdr_len;
*qw0 = rte_cpu_to_le_32(tso_len & IDPF_TXD_FLEX_CTX_MSS_RT_M) |
@@ -933,7 +934,8 @@ idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_offload.tso_segsz = tx_pkt->tso_segsz;
/* Calculate the number of context descriptors needed. */
uint64_t cd_qw0, cd_qw1;
- nb_ctx = idpf_set_tso_ctx(ol_flags, tx_pkt, tx_offload, &cd_qw0, &cd_qw1);
+ nb_ctx = idpf_set_tso_ctx(ol_flags, tx_pkt, &tx_offload, txq,
+ NULL /* unused */, &cd_qw0, &cd_qw1);
/* Calculate the number of TX descriptors needed for
* each packet. For TSO packets, use ci_calc_pkt_desc as
@@ -1339,167 +1341,8 @@ uint16_t
idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
- volatile struct ci_tx_desc *txd;
- volatile struct ci_tx_desc *txr;
- union ci_tx_offload tx_offload = {0};
- struct ci_tx_entry *txe, *txn;
- struct ci_tx_entry *sw_ring;
- struct ci_tx_queue *txq;
- struct rte_mbuf *tx_pkt;
- struct rte_mbuf *m_seg;
- uint64_t buf_dma_addr;
- uint32_t td_offset;
- uint64_t ol_flags;
- uint16_t tx_last;
- uint16_t nb_used;
- uint16_t nb_ctx;
- uint16_t td_cmd;
- uint16_t tx_id;
- uint16_t nb_tx;
- uint16_t slen;
-
- nb_tx = 0;
- txq = tx_queue;
-
- if (unlikely(txq == NULL))
- return nb_tx;
-
- sw_ring = txq->sw_ring;
- txr = txq->ci_tx_ring;
- tx_id = txq->tx_tail;
- txe = &sw_ring[tx_id];
-
- /* Check if the descriptor ring needs to be cleaned. */
- if (txq->nb_tx_free < txq->tx_free_thresh)
- (void)ci_tx_xmit_cleanup(txq);
-
- for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
- td_cmd = 0;
- td_offset = 0;
-
- tx_pkt = *tx_pkts++;
- RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
-
- ol_flags = tx_pkt->ol_flags;
- tx_offload.l2_len = tx_pkt->l2_len;
- tx_offload.l3_len = tx_pkt->l3_len;
- tx_offload.l4_len = tx_pkt->l4_len;
- tx_offload.tso_segsz = tx_pkt->tso_segsz;
- /* Calculate the number of context descriptors needed. */
- uint64_t cd_qw0, cd_qw1;
- nb_ctx = idpf_set_tso_ctx(ol_flags, tx_pkt, tx_offload, &cd_qw0, &cd_qw1);
-
- /* The number of descriptors that must be allocated for
- * a packet. For TSO packets, use ci_calc_pkt_desc as
- * the mbuf data size might exceed max data size that hw allows
- * per tx desc.
- */
- if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
- nb_used = (uint16_t)(ci_calc_pkt_desc(tx_pkt) + nb_ctx);
- else
- nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
- tx_last = (uint16_t)(tx_id + nb_used - 1);
-
- /* Circular ring */
- if (tx_last >= txq->nb_tx_desc)
- tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
-
- TX_LOG(DEBUG, "port_id=%u queue_id=%u"
- " tx_first=%u tx_last=%u",
- txq->port_id, txq->queue_id, tx_id, tx_last);
-
- if (nb_used > txq->nb_tx_free) {
- if (ci_tx_xmit_cleanup(txq) != 0) {
- if (nb_tx == 0)
- return 0;
- goto end_of_tx;
- }
- if (unlikely(nb_used > txq->tx_rs_thresh)) {
- while (nb_used > txq->nb_tx_free) {
- if (ci_tx_xmit_cleanup(txq) != 0) {
- if (nb_tx == 0)
- return 0;
- goto end_of_tx;
- }
- }
- }
- }
-
- if (ol_flags & CI_TX_CKSUM_OFFLOAD_MASK)
- td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
-
- if (nb_ctx != 0) {
- /* Setup TX context descriptor if required */
- uint64_t *ctx_txd = RTE_CAST_PTR(uint64_t *, &txr[tx_id]);
-
- txn = &sw_ring[txe->next_id];
- RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
- if (txe->mbuf != NULL) {
- rte_pktmbuf_free_seg(txe->mbuf);
- txe->mbuf = NULL;
- }
-
- ctx_txd[0] = cd_qw0;
- ctx_txd[1] = cd_qw1;
-
- txe->last_id = tx_last;
- tx_id = txe->next_id;
- txe = txn;
- }
-
- m_seg = tx_pkt;
- do {
- txd = &txr[tx_id];
- txn = &sw_ring[txe->next_id];
-
- if (txe->mbuf != NULL)
- rte_pktmbuf_free_seg(txe->mbuf);
- txe->mbuf = m_seg;
-
- /* Setup TX Descriptor */
- slen = m_seg->data_len;
- buf_dma_addr = rte_mbuf_data_iova(m_seg);
- txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
- ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
- ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
- ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S));
-
- txe->last_id = tx_last;
- tx_id = txe->next_id;
- txe = txn;
- m_seg = m_seg->next;
- } while (m_seg);
-
- /* The last packet data descriptor needs End Of Packet (EOP) */
- td_cmd |= CI_TX_DESC_CMD_EOP;
- txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
-
- if (txq->nb_tx_used >= txq->tx_rs_thresh) {
- TX_LOG(DEBUG, "Setting RS bit on TXD id="
- "%4u (port=%d queue=%d)",
- tx_last, txq->port_id, txq->queue_id);
-
- td_cmd |= CI_TX_DESC_CMD_RS;
-
- /* Update txq RS bit counters */
- txq->nb_tx_used = 0;
- }
-
- txd->cmd_type_offset_bsz |= rte_cpu_to_le_16(td_cmd << CI_TXD_QW1_CMD_S);
- }
-
-end_of_tx:
- rte_wmb();
-
- TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
- txq->port_id, txq->queue_id, tx_id, nb_tx);
-
- IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id);
- txq->tx_tail = tx_id;
-
- return nb_tx;
+ return ci_xmit_pkts(tx_queue, tx_pkts, nb_pkts, CI_VLAN_IN_L2TAG1,
+ idpf_set_tso_ctx, NULL, NULL);
}
/* TX prep functions */
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 19/27] net/intel: avoid writing the final pkt descriptor twice
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (17 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 18/27] net/idpf: use common scalar Tx function Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 20/27] net/intel: write descriptors using non-volatile pointers Bruce Richardson
` (7 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
In the scalar datapath, there is a loop to handle multi-segment, and
multi-descriptor packets on Tx. After that loop, the end-of-packet bit
was written to the descriptor separately, meaning that for each
single-descriptor packet there were two writes to the second quad-word -
basically 3 x 64-bit writes rather than just 2. Adjusting the code to
compute the EOP bit inside the loop saves that extra write per packet
and so improves performance.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 6079a558e4..7b643fcf44 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -378,6 +378,10 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
txn = &sw_ring[txe->next_id];
}
+ /* fill the last descriptor with End of Packet (EOP) bit */
+ if (m_seg->next == NULL)
+ td_cmd |= CI_TX_DESC_CMD_EOP;
+
txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
@@ -390,21 +394,17 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
txe = txn;
m_seg = m_seg->next;
} while (m_seg);
-
- /* fill the last descriptor with End of Packet (EOP) bit */
- td_cmd |= CI_TX_DESC_CMD_EOP;
txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
/* set RS bit on the last descriptor of one packet */
if (txq->nb_tx_used >= txq->tx_rs_thresh) {
- td_cmd |= CI_TX_DESC_CMD_RS;
+ txd->cmd_type_offset_bsz |=
+ rte_cpu_to_le_64(CI_TX_DESC_CMD_RS << CI_TXD_QW1_CMD_S);
/* Update txq RS bit counters */
txq->nb_tx_used = 0;
}
- txd->cmd_type_offset_bsz |=
- rte_cpu_to_le_64(((uint64_t)td_cmd) << CI_TXD_QW1_CMD_S);
if (ts_fns != NULL)
ts_id = ts_fns->write_ts_desc(txq, tx_pkt, tx_id, ts_id);
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 20/27] net/intel: write descriptors using non-volatile pointers
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (18 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 19/27] net/intel: avoid writing the final pkt descriptor twice Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-20 8:43 ` Morten Brørup
2025-12-19 17:25 ` [RFC PATCH 21/27] net/intel: remove unnecessary flag clearing Bruce Richardson
` (6 subsequent siblings)
26 siblings, 1 reply; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
Use a non-volatile uint64_t pointer to store to the descriptor ring.
This will allow the compiler to optionally merge the stores as it sees
best.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 7b643fcf44..95e9acbe60 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -184,6 +184,15 @@ struct ci_timesstamp_queue_fns {
write_ts_tail_t write_ts_tail;
};
+static inline void
+write_txd(volatile void *txd, uint64_t qw0, uint64_t qw1)
+{
+ uint64_t *txd_qw = RTE_CAST_PTR(void *, txd);
+
+ txd_qw[0] = rte_cpu_to_le_64(qw0);
+ txd_qw[1] = rte_cpu_to_le_64(qw1);
+}
+
static inline uint16_t
ci_xmit_pkts(struct ci_tx_queue *txq,
struct rte_mbuf **tx_pkts,
@@ -313,8 +322,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
txe->mbuf = NULL;
}
- ctx_txd[0] = cd_qw0;
- ctx_txd[1] = cd_qw1;
+ write_txd(ctx_txd, cd_qw0, cd_qw1);
txe->last_id = tx_last;
tx_id = txe->next_id;
@@ -361,12 +369,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) &&
unlikely(slen > CI_MAX_DATA_PER_TXD)) {
- txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ const uint64_t cmd_type_offset_bsz = CI_TX_DESC_DTYPE_DATA |
((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
((uint64_t)CI_MAX_DATA_PER_TXD << CI_TXD_QW1_TX_BUF_SZ_S) |
- ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S);
+ write_txd(txd, buf_dma_addr, cmd_type_offset_bsz);
buf_dma_addr += CI_MAX_DATA_PER_TXD;
slen -= CI_MAX_DATA_PER_TXD;
@@ -382,12 +390,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
if (m_seg->next == NULL)
td_cmd |= CI_TX_DESC_CMD_EOP;
- txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
- txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+ const uint64_t cmd_type_offset_bsz = CI_TX_DESC_DTYPE_DATA |
((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S) |
- ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S);
+ write_txd(txd, buf_dma_addr, cmd_type_offset_bsz);
txe->last_id = tx_last;
tx_id = txe->next_id;
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* RE: [RFC PATCH 20/27] net/intel: write descriptors using non-volatile pointers
2025-12-19 17:25 ` [RFC PATCH 20/27] net/intel: write descriptors using non-volatile pointers Bruce Richardson
@ 2025-12-20 8:43 ` Morten Brørup
0 siblings, 0 replies; 30+ messages in thread
From: Morten Brørup @ 2025-12-20 8:43 UTC (permalink / raw)
To: Bruce Richardson, dev
> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> Sent: Friday, 19 December 2025 18.26
>
> Use a non-volatile uint64_t pointer to store to the descriptor ring.
> This will allow the compiler to optionally merge the stores as it sees
> best.
I suppose there was a reason for the volatile.
Is removing it really safe?
E.g. this will also allow the compiler to reorder stores; not just the pair of 64-bits, but also stores to multiple descriptors.
One more comment inline below.
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> drivers/net/intel/common/tx_scalar_fns.h | 24 ++++++++++++++++--------
> 1 file changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/intel/common/tx_scalar_fns.h
> b/drivers/net/intel/common/tx_scalar_fns.h
> index 7b643fcf44..95e9acbe60 100644
> --- a/drivers/net/intel/common/tx_scalar_fns.h
> +++ b/drivers/net/intel/common/tx_scalar_fns.h
> @@ -184,6 +184,15 @@ struct ci_timesstamp_queue_fns {
> write_ts_tail_t write_ts_tail;
> };
>
> +static inline void
> +write_txd(volatile void *txd, uint64_t qw0, uint64_t qw1)
> +{
> + uint64_t *txd_qw = RTE_CAST_PTR(void *, txd);
If the descriptors are 16-byte aligned, you could mark them as such, so the compiler can use 128-bit stores on architectures where alignment matters.
> +
> + txd_qw[0] = rte_cpu_to_le_64(qw0);
> + txd_qw[1] = rte_cpu_to_le_64(qw1);
> +}
> +
> static inline uint16_t
> ci_xmit_pkts(struct ci_tx_queue *txq,
> struct rte_mbuf **tx_pkts,
> @@ -313,8 +322,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
> txe->mbuf = NULL;
> }
>
> - ctx_txd[0] = cd_qw0;
> - ctx_txd[1] = cd_qw1;
> + write_txd(ctx_txd, cd_qw0, cd_qw1);
>
> txe->last_id = tx_last;
> tx_id = txe->next_id;
> @@ -361,12 +369,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
>
> while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> RTE_MBUF_F_TX_UDP_SEG)) &&
> unlikely(slen > CI_MAX_DATA_PER_TXD)) {
> - txd->buffer_addr =
> rte_cpu_to_le_64(buf_dma_addr);
> - txd->cmd_type_offset_bsz =
> rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
> + const uint64_t cmd_type_offset_bsz =
> CI_TX_DESC_DTYPE_DATA |
> ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
> ((uint64_t)td_offset <<
> CI_TXD_QW1_OFFSET_S) |
> ((uint64_t)CI_MAX_DATA_PER_TXD <<
> CI_TXD_QW1_TX_BUF_SZ_S) |
> - ((uint64_t)td_tag <<
> CI_TXD_QW1_L2TAG1_S));
> + ((uint64_t)td_tag <<
> CI_TXD_QW1_L2TAG1_S);
> + write_txd(txd, buf_dma_addr,
> cmd_type_offset_bsz);
>
> buf_dma_addr += CI_MAX_DATA_PER_TXD;
> slen -= CI_MAX_DATA_PER_TXD;
> @@ -382,12 +390,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
> if (m_seg->next == NULL)
> td_cmd |= CI_TX_DESC_CMD_EOP;
>
> - txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
> - txd->cmd_type_offset_bsz =
> rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
> + const uint64_t cmd_type_offset_bsz =
> CI_TX_DESC_DTYPE_DATA |
> ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
> ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
> ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S) |
> - ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
> + ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S);
> + write_txd(txd, buf_dma_addr, cmd_type_offset_bsz);
>
> txe->last_id = tx_last;
> tx_id = txe->next_id;
> --
> 2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [RFC PATCH 21/27] net/intel: remove unnecessary flag clearing
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (19 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 20/27] net/intel: write descriptors using non-volatile pointers Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 22/27] net/intel: mark mid-burst ring cleanup as unlikely Bruce Richardson
` (5 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
When cleaning the Tx ring, there is no need to zero out the done flag
from the completed entry. That flag will be automatically cleared when
the descriptor is next written. This gives a small performance benefit.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 7 -------
1 file changed, 7 deletions(-)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 95e9acbe60..cb45029bd7 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -51,13 +51,6 @@ ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
else
nb_tx_to_clean = (uint16_t)(desc_to_clean_to - last_desc_cleaned);
- /* The last descriptor to clean is done, so that means all the
- * descriptors from the last descriptor that was cleaned
- * up to the last descriptor with the RS bit set
- * are done. Only reset the threshold descriptor.
- */
- txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
-
/* Update the txq to reflect the last descriptor that was cleaned */
txq->last_desc_cleaned = desc_to_clean_to;
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 22/27] net/intel: mark mid-burst ring cleanup as unlikely
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (20 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 21/27] net/intel: remove unnecessary flag clearing Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 23/27] net/intel: add special handling for single desc packets Bruce Richardson
` (4 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
It should rarely be the case that we need to cleanup the descriptor ring
mid-burst, so mark as unlikely to help performance.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index cb45029bd7..27791cf138 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -272,7 +272,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
if (tx_last >= txq->nb_tx_desc)
tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
- if (nb_used > txq->nb_tx_free) {
+ if (unlikely(nb_used > txq->nb_tx_free)) {
if (ci_tx_xmit_cleanup(txq) != 0) {
if (nb_tx == 0)
return 0;
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 23/27] net/intel: add special handling for single desc packets
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (21 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 22/27] net/intel: mark mid-burst ring cleanup as unlikely Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 24/27] net/intel: use separate array for desc status tracking Bruce Richardson
` (3 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson
Within the scalar packets, add a shortcut path for packets that don't
use TSO and have only a single data descriptor.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx_scalar_fns.h | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 27791cf138..55502b46ed 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -304,6 +304,28 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
ci_txd_enable_checksum(ol_flags, &td_cmd,
&td_offset, tx_offload);
+ /* special case for single descriptor packet, without TSO offload */
+ if (nb_used == 1 && (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) == 0) {
+ txd = &ci_tx_ring[tx_id];
+ tx_id = txe->next_id;
+
+ if (txe->mbuf)
+ rte_pktmbuf_free_seg(txe->mbuf);
+ *txe = (struct ci_tx_entry){ .mbuf = tx_pkt, .last_id = tx_last, .next_id = tx_id };
+
+ /* Setup TX Descriptor */
+ td_cmd |= CI_TX_DESC_CMD_EOP;
+ const uint64_t cmd_type_offset_bsz = CI_TX_DESC_DTYPE_DATA |
+ ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
+ ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
+ ((uint64_t)tx_pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S) |
+ ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S);
+ write_txd(txd, rte_mbuf_data_iova(tx_pkt), cmd_type_offset_bsz);
+
+ txe = &sw_ring[tx_id];
+ goto end_pkt;
+ }
+
if (nb_ctx) {
/* Setup TX context descriptor if required */
uint64_t *ctx_txd = RTE_CAST_PTR(uint64_t *, &ci_tx_ring[tx_id]);
@@ -395,6 +417,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
txe = txn;
m_seg = m_seg->next;
} while (m_seg);
+end_pkt:
txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 24/27] net/intel: use separate array for desc status tracking
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (22 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 23/27] net/intel: add special handling for single desc packets Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 25/27] net/ixgbe: " Bruce Richardson
` (2 subsequent siblings)
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Vladimir Medvedkin, Anatoly Burakov,
Jingjing Wu, Praveen Shetty
Rather than writing a last_id for each individual descriptor, we can
write one only for places where the "report status" (RS) bit is set,
i.e. the descriptors which will be written back when done. The method
used for marking what descriptors are free is also changed in the
process, even if the last descriptor with the "done" bits set is past
the expected point, we only track up to the expected point, and leave
the rest to be counted as freed next time. This means that we always
have the RS/DD bits set at fixed intervals, and we always track free
slots in units of the same tx_free_thresh intervals.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 4 ++
drivers/net/intel/common/tx_scalar_fns.h | 59 +++++++++++------------
drivers/net/intel/i40e/i40e_rxtx.c | 20 ++++++++
drivers/net/intel/iavf/iavf_rxtx.c | 19 ++++++++
drivers/net/intel/ice/ice_rxtx.c | 20 ++++++++
drivers/net/intel/idpf/idpf_common_rxtx.c | 7 +++
drivers/net/intel/idpf/idpf_rxtx.c | 13 +++++
7 files changed, 110 insertions(+), 32 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 0d11daaab3..9b3f8385e6 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -126,6 +126,8 @@ struct ci_tx_queue {
struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
struct ci_tx_entry_vec *sw_ring_vec;
};
+ /* Scalar TX path: Array tracking last_id at each RS threshold boundary */
+ uint16_t *rs_last_id;
uint16_t nb_tx_desc; /* number of TX descriptors */
uint16_t tx_tail; /* current value of tail register */
uint16_t nb_tx_used; /* number of TX desc used since RS bit set */
@@ -139,6 +141,8 @@ struct ci_tx_queue {
uint16_t tx_free_thresh;
/* Number of TX descriptors to use before RS bit is set. */
uint16_t tx_rs_thresh;
+ /* Scalar TX path: log2 of tx_rs_thresh for efficient bit operations */
+ uint8_t log2_rs_thresh;
uint16_t port_id; /* Device port identifier. */
uint16_t queue_id; /* TX queue index. */
uint16_t reg_idx;
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 55502b46ed..3d0a23eda3 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -23,37 +23,24 @@
static __rte_always_inline int
ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
{
- struct ci_tx_entry *sw_ring = txq->sw_ring;
volatile struct ci_tx_desc *txd = txq->ci_tx_ring;
- uint16_t last_desc_cleaned = txq->last_desc_cleaned;
- uint16_t nb_tx_desc = txq->nb_tx_desc;
- uint16_t desc_to_clean_to;
- uint16_t nb_tx_to_clean;
+ const uint16_t last_desc_cleaned = txq->last_desc_cleaned;
+ const uint16_t nb_tx_desc = txq->nb_tx_desc;
- /* Determine the last descriptor needing to be cleaned */
- desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
- if (desc_to_clean_to >= nb_tx_desc)
- desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
-
- /* Check to make sure the last descriptor to clean is done */
- desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
+ const uint16_t rs_idx = (last_desc_cleaned == nb_tx_desc - 1) ?
+ 0 :
+ (last_desc_cleaned + 1) >> txq->log2_rs_thresh;
+ uint16_t desc_to_clean_to = (rs_idx << txq->log2_rs_thresh) + (txq->tx_rs_thresh - 1);
/* Check if descriptor is done - all drivers use 0xF as done value in bits 3:0 */
- if ((txd[desc_to_clean_to].cmd_type_offset_bsz & rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
- rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE)) {
+ if ((txd[txq->rs_last_id[rs_idx]].cmd_type_offset_bsz & rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
+ rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
/* Descriptor not yet processed by hardware */
return -1;
- }
-
- /* Figure out how many descriptors will be cleaned */
- if (last_desc_cleaned > desc_to_clean_to)
- nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) + desc_to_clean_to);
- else
- nb_tx_to_clean = (uint16_t)(desc_to_clean_to - last_desc_cleaned);
/* Update the txq to reflect the last descriptor that was cleaned */
txq->last_desc_cleaned = desc_to_clean_to;
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
+ txq->nb_tx_free += txq->tx_rs_thresh;
return 0;
}
@@ -232,6 +219,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
uint16_t nb_ipsec = 0;
uint64_t ipsec_qw0 = 0, ipsec_qw1 = 0;
uint64_t cd_qw0, cd_qw1;
+ uint16_t pkt_rs_idx;
tx_pkt = *tx_pkts++;
td_cmd = CI_TX_DESC_CMD_ICRC;
@@ -272,6 +260,9 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
if (tx_last >= txq->nb_tx_desc)
tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
+ /* Track the RS threshold bucket at packet start */
+ pkt_rs_idx = (uint16_t)(tx_id >> txq->log2_rs_thresh);
+
if (unlikely(nb_used > txq->nb_tx_free)) {
if (ci_tx_xmit_cleanup(txq) != 0) {
if (nb_tx == 0)
@@ -311,8 +302,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
if (txe->mbuf)
rte_pktmbuf_free_seg(txe->mbuf);
- *txe = (struct ci_tx_entry){ .mbuf = tx_pkt, .last_id = tx_last, .next_id = tx_id };
-
+ txe->mbuf = tx_pkt;
/* Setup TX Descriptor */
td_cmd |= CI_TX_DESC_CMD_EOP;
const uint64_t cmd_type_offset_bsz = CI_TX_DESC_DTYPE_DATA |
@@ -339,7 +329,6 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
write_txd(ctx_txd, cd_qw0, cd_qw1);
- txe->last_id = tx_last;
tx_id = txe->next_id;
txe = txn;
}
@@ -358,7 +347,6 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
ipsec_txd[0] = ipsec_qw0;
ipsec_txd[1] = ipsec_qw1;
- txe->last_id = tx_last;
tx_id = txe->next_id;
txe = txn;
}
@@ -394,7 +382,6 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
buf_dma_addr += CI_MAX_DATA_PER_TXD;
slen -= CI_MAX_DATA_PER_TXD;
- txe->last_id = tx_last;
tx_id = txe->next_id;
txe = txn;
txd = &ci_tx_ring[tx_id];
@@ -412,7 +399,6 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S);
write_txd(txd, buf_dma_addr, cmd_type_offset_bsz);
- txe->last_id = tx_last;
tx_id = txe->next_id;
txe = txn;
m_seg = m_seg->next;
@@ -421,13 +407,22 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
- /* set RS bit on the last descriptor of one packet */
- if (txq->nb_tx_used >= txq->tx_rs_thresh) {
+ /* Check if packet crosses into a new RS threshold bucket.
+ * The RS bit is set on the last descriptor when we move from one bucket to another.
+ * For example, with tx_rs_thresh=32 and a 5-descriptor packet using slots 30-34:
+ * - pkt_rs_idx = 30 >> 5 = 0 (started in bucket 0)
+ * - tx_last = 34, so 35 >> 5 = 1 (next packet is in bucket 1)
+ * - Since 0 != 1, set RS bit on descriptor 34, and record rs_last_id[0] = 34
+ */
+ uint16_t next_rs_idx = ((tx_last + 1) >> txq->log2_rs_thresh);
+
+ if (next_rs_idx != pkt_rs_idx) {
+ /* Packet crossed into a new bucket - set RS bit on last descriptor */
txd->cmd_type_offset_bsz |=
rte_cpu_to_le_64(CI_TX_DESC_CMD_RS << CI_TXD_QW1_CMD_S);
- /* Update txq RS bit counters */
- txq->nb_tx_used = 0;
+ /* Record the last descriptor ID for the bucket we're leaving */
+ txq->rs_last_id[pkt_rs_idx] = tx_last;
}
if (ts_fns != NULL)
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index aef78c5358..1fadd0407a 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -24,6 +24,7 @@
#include <rte_ip.h>
#include <rte_net.h>
#include <rte_vect.h>
+#include <rte_bitops.h>
#include "i40e_logs.h"
#include "base/i40e_prototype.h"
@@ -2269,6 +2270,13 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
(int)queue_idx);
return I40E_ERR_PARAM;
}
+ if (!rte_is_power_of_2(tx_rs_thresh)) {
+ PMD_INIT_LOG(ERR, "tx_rs_thresh must be a power of 2. (tx_rs_thresh=%u port=%d queue=%d)",
+ (unsigned int)tx_rs_thresh,
+ (int)dev->data->port_id,
+ (int)queue_idx);
+ return I40E_ERR_PARAM;
+ }
if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
"tx_rs_thresh is greater than 1. "
@@ -2310,6 +2318,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->mz = tz;
txq->nb_tx_desc = nb_desc;
txq->tx_rs_thresh = tx_rs_thresh;
+ txq->log2_rs_thresh = rte_log2_u32(tx_rs_thresh);
txq->tx_free_thresh = tx_free_thresh;
txq->queue_id = queue_idx;
txq->reg_idx = reg_idx;
@@ -2333,6 +2342,16 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
+ /* Allocate RS last_id tracking array */
+ uint16_t num_rs_buckets = nb_desc / tx_rs_thresh;
+ txq->rs_last_id = rte_zmalloc_socket(NULL, sizeof(txq->rs_last_id[0]) * num_rs_buckets,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq->rs_last_id == NULL) {
+ i40e_tx_queue_release(txq);
+ PMD_DRV_LOG(ERR, "Failed to allocate memory for RS last_id array");
+ return -ENOMEM;
+ }
+
i40e_reset_tx_queue(txq);
txq->q_set = TRUE;
@@ -2378,6 +2397,7 @@ i40e_tx_queue_release(void *txq)
ci_txq_release_all_mbufs(q, false);
rte_free(q->sw_ring);
+ rte_free(q->rs_last_id);
rte_memzone_free(q->mz);
rte_free(q);
}
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index f96876ca46..4517d55011 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -25,6 +25,7 @@
#include <rte_ip.h>
#include <rte_net.h>
#include <rte_vect.h>
+#include <rte_bitops.h>
#include <rte_vxlan.h>
#include <rte_gtp.h>
#include <rte_geneve.h>
@@ -204,6 +205,11 @@ check_tx_thresh(uint16_t nb_desc, uint16_t tx_rs_thresh,
tx_rs_thresh, nb_desc);
return -EINVAL;
}
+ if (!rte_is_power_of_2(tx_rs_thresh)) {
+ PMD_INIT_LOG(ERR, "tx_rs_thresh must be a power of 2. (tx_rs_thresh=%u)",
+ tx_rs_thresh);
+ return -EINVAL;
+ }
return 0;
}
@@ -804,6 +810,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->nb_tx_desc = nb_desc;
txq->tx_rs_thresh = tx_rs_thresh;
+ txq->log2_rs_thresh = rte_log2_u32(tx_rs_thresh);
txq->tx_free_thresh = tx_free_thresh;
txq->queue_id = queue_idx;
txq->port_id = dev->data->port_id;
@@ -827,6 +834,17 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
+ /* Allocate RS last_id tracking array */
+ uint16_t num_rs_buckets = nb_desc / tx_rs_thresh;
+ txq->rs_last_id = rte_zmalloc_socket(NULL, sizeof(txq->rs_last_id[0]) * num_rs_buckets,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq->rs_last_id == NULL) {
+ PMD_INIT_LOG(ERR, "Failed to allocate memory for RS last_id array");
+ rte_free(txq->sw_ring);
+ rte_free(txq);
+ return -ENOMEM;
+ }
+
/* Allocate TX hardware ring descriptors. */
ring_size = sizeof(struct ci_tx_desc) * IAVF_MAX_RING_DESC;
ring_size = RTE_ALIGN(ring_size, IAVF_DMA_MEM_ALIGN);
@@ -1051,6 +1069,7 @@ iavf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
ci_txq_release_all_mbufs(q, q->use_ctx);
rte_free(q->sw_ring);
+ rte_free(q->rs_last_id);
rte_memzone_free(q->mz);
rte_free(q);
}
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 2c73011181..a6a454ddf5 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -5,6 +5,7 @@
#include <ethdev_driver.h>
#include <rte_net.h>
#include <rte_vect.h>
+#include <rte_bitops.h>
#include "ice_rxtx.h"
#include "ice_rxtx_vec_common.h"
@@ -1576,6 +1577,13 @@ ice_tx_queue_setup(struct rte_eth_dev *dev,
(int)queue_idx);
return -EINVAL;
}
+ if (!rte_is_power_of_2(tx_rs_thresh)) {
+ PMD_INIT_LOG(ERR, "tx_rs_thresh must be a power of 2. (tx_rs_thresh=%u port=%d queue=%d)",
+ (unsigned int)tx_rs_thresh,
+ (int)dev->data->port_id,
+ (int)queue_idx);
+ return -EINVAL;
+ }
if (tx_rs_thresh > 1 && tx_conf->tx_thresh.wthresh != 0) {
PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
"tx_rs_thresh is greater than 1. "
@@ -1618,6 +1626,7 @@ ice_tx_queue_setup(struct rte_eth_dev *dev,
txq->mz = tz;
txq->nb_tx_desc = nb_desc;
txq->tx_rs_thresh = tx_rs_thresh;
+ txq->log2_rs_thresh = rte_log2_u32(tx_rs_thresh);
txq->tx_free_thresh = tx_free_thresh;
txq->queue_id = queue_idx;
@@ -1642,6 +1651,16 @@ ice_tx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
+ /* Allocate RS last_id tracking array */
+ uint16_t num_rs_buckets = nb_desc / tx_rs_thresh;
+ txq->rs_last_id = rte_zmalloc_socket(NULL, sizeof(txq->rs_last_id[0]) * num_rs_buckets,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq->rs_last_id == NULL) {
+ ice_tx_queue_release(txq);
+ PMD_INIT_LOG(ERR, "Failed to allocate memory for RS last_id array");
+ return -ENOMEM;
+ }
+
if (vsi->type == ICE_VSI_PF && (offloads & RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP)) {
if (hw->phy_model != ICE_PHY_E830) {
ice_tx_queue_release(txq);
@@ -1714,6 +1733,7 @@ ice_tx_queue_release(void *txq)
ci_txq_release_all_mbufs(q, false);
rte_free(q->sw_ring);
+ rte_free(q->rs_last_id);
if (q->tsq) {
rte_memzone_free(q->tsq->ts_mz);
rte_free(q->tsq);
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index 81bc45f6ef..1d123f6350 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -5,6 +5,7 @@
#include <eal_export.h>
#include <rte_mbuf_dyn.h>
#include <rte_errno.h>
+#include <rte_bitops.h>
#include "idpf_common_rxtx.h"
#include "idpf_common_device.h"
@@ -73,6 +74,11 @@ idpf_qc_tx_thresh_check(uint16_t nb_desc, uint16_t tx_rs_thresh,
tx_rs_thresh, nb_desc);
return -EINVAL;
}
+ if (!rte_is_power_of_2(tx_rs_thresh)) {
+ DRV_LOG(ERR, "tx_rs_thresh must be a power of 2. (tx_rs_thresh=%u)",
+ tx_rs_thresh);
+ return -EINVAL;
+ }
return 0;
}
@@ -333,6 +339,7 @@ idpf_qc_tx_queue_release(void *txq)
}
ci_txq_release_all_mbufs(q, false);
+ rte_free(q->rs_last_id);
rte_free(q->sw_ring);
rte_memzone_free(q->mz);
rte_free(q);
diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c
index e974eb44b0..5c2516f556 100644
--- a/drivers/net/intel/idpf/idpf_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_rxtx.c
@@ -437,6 +437,7 @@ idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
txq->nb_tx_desc = nb_desc;
txq->tx_rs_thresh = tx_rs_thresh;
+ txq->log2_rs_thresh = rte_log2_u32(tx_rs_thresh);
txq->tx_free_thresh = tx_free_thresh;
txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
txq->port_id = dev->data->port_id;
@@ -468,6 +469,15 @@ idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
goto err_sw_ring_alloc;
}
+ txq->rs_last_id = rte_zmalloc_socket("idpf tx rs_last_id",
+ sizeof(txq->rs_last_id[0]) * (nb_desc >> txq->log2_rs_thresh),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq->rs_last_id == NULL) {
+ PMD_INIT_LOG(ERR, "Failed to allocate memory for TX RS tracking");
+ ret = -ENOMEM;
+ goto err_rs_last_id_alloc;
+ }
+
if (!is_splitq) {
txq->ci_tx_ring = mz->addr;
idpf_qc_single_tx_queue_reset(txq);
@@ -490,6 +500,9 @@ idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
return 0;
err_complq_setup:
+ rte_free(txq->rs_last_id);
+err_rs_last_id_alloc:
+ rte_free(txq->sw_ring);
err_sw_ring_alloc:
idpf_dma_zone_release(mz);
err_mz_reserve:
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 25/27] net/ixgbe: use separate array for desc status tracking
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (23 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 24/27] net/intel: use separate array for desc status tracking Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 26/27] net/intel: drop unused Tx queue used count Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 27/27] net/intel: remove index for tracking end of packet Bruce Richardson
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev; +Cc: Bruce Richardson, Anatoly Burakov, Vladimir Medvedkin
Due to significant differences in the ixgbe transmit descriptors, the
ixgbe driver does not use the common scalar Tx functionality. Update the
driver directly so its use of the rs_last_id array matches that of the
common Tx code.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/ixgbe/ixgbe_rxtx.c | 86 +++++++++++++++-------------
1 file changed, 47 insertions(+), 39 deletions(-)
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index a7583c178a..3eeec220fd 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -43,6 +43,7 @@
#include <rte_ip.h>
#include <rte_net.h>
#include <rte_vect.h>
+#include <rte_bitops.h>
#include "ixgbe_logs.h"
#include "base/ixgbe_api.h"
@@ -571,57 +572,35 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
static inline int
ixgbe_xmit_cleanup(struct ci_tx_queue *txq)
{
- struct ci_tx_entry *sw_ring = txq->sw_ring;
volatile union ixgbe_adv_tx_desc *txr = txq->ixgbe_tx_ring;
- uint16_t last_desc_cleaned = txq->last_desc_cleaned;
- uint16_t nb_tx_desc = txq->nb_tx_desc;
- uint16_t desc_to_clean_to;
- uint16_t nb_tx_to_clean;
- uint32_t status;
+ const uint16_t last_desc_cleaned = txq->last_desc_cleaned;
+ const uint16_t nb_tx_desc = txq->nb_tx_desc;
- /* Determine the last descriptor needing to be cleaned */
- desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
- if (desc_to_clean_to >= nb_tx_desc)
- desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
+ const uint16_t rs_idx = (last_desc_cleaned == nb_tx_desc - 1) ?
+ 0 :
+ (last_desc_cleaned + 1) >> txq->log2_rs_thresh;
+ uint16_t desc_to_clean_to = (rs_idx << txq->log2_rs_thresh) + (txq->tx_rs_thresh - 1);
- /* Check to make sure the last descriptor to clean is done */
- desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
- status = txr[desc_to_clean_to].wb.status;
+ uint32_t status = txr[txq->rs_last_id[rs_idx]].wb.status;
if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
PMD_TX_LOG(DEBUG,
"TX descriptor %4u is not done"
"(port=%d queue=%d)",
- desc_to_clean_to,
+ txq->rs_last_id[rs_idx],
txq->port_id, txq->queue_id);
/* Failed to clean any descriptors, better luck next time */
return -(1);
}
- /* Figure out how many descriptors will be cleaned */
- if (last_desc_cleaned > desc_to_clean_to)
- nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
- desc_to_clean_to);
- else
- nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
- last_desc_cleaned);
-
PMD_TX_LOG(DEBUG,
"Cleaning %4u TX descriptors: %4u to %4u "
"(port=%d queue=%d)",
- nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
+ txq->tx_rs_thresh, last_desc_cleaned, desc_to_clean_to,
txq->port_id, txq->queue_id);
- /*
- * The last descriptor to clean is done, so that means all the
- * descriptors from the last descriptor that was cleaned
- * up to the last descriptor with the RS bit set
- * are done. Only reset the threshold descriptor.
- */
- txr[desc_to_clean_to].wb.status = 0;
-
/* Update the txq to reflect the last descriptor that was cleaned */
txq->last_desc_cleaned = desc_to_clean_to;
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
+ txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
/* No Error */
return 0;
@@ -749,6 +728,9 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
if (tx_last >= txq->nb_tx_desc)
tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
+ /* Track the RS threshold bucket at packet start */
+ uint16_t pkt_rs_idx = (uint16_t)(tx_id >> txq->log2_rs_thresh);
+
PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
" tx_first=%u tx_last=%u",
(unsigned) txq->port_id,
@@ -876,7 +858,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_offload,
rte_security_dynfield(tx_pkt));
- txe->last_id = tx_last;
tx_id = txe->next_id;
txe = txn;
}
@@ -922,7 +903,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
rte_cpu_to_le_32(cmd_type_len | slen);
txd->read.olinfo_status =
rte_cpu_to_le_32(olinfo_status);
- txe->last_id = tx_last;
tx_id = txe->next_id;
txe = txn;
m_seg = m_seg->next;
@@ -935,8 +915,18 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
- /* Set RS bit only on threshold packets' last descriptor */
- if (txq->nb_tx_used >= txq->tx_rs_thresh) {
+ /*
+ * Check if packet crosses into a new RS threshold bucket.
+ * The RS bit is set on the last descriptor when we move from one bucket to another.
+ * For example, with tx_rs_thresh=32 and a 5-descriptor packet using slots 30-34:
+ * - pkt_rs_idx = 30 >> 5 = 0 (started in bucket 0)
+ * - tx_last = 34, so 35 >> 5 = 1 (next packet is in bucket 1)
+ * - Since 0 != 1, set RS bit on descriptor 34, and record rs_last_id[0] = 34
+ */
+ uint16_t next_rs_idx = ((tx_last + 1) >> txq->log2_rs_thresh);
+
+ if (next_rs_idx != pkt_rs_idx) {
+ /* Packet crossed into a new bucket - set RS bit on last descriptor */
PMD_TX_LOG(DEBUG,
"Setting RS bit on TXD id="
"%4u (port=%d queue=%d)",
@@ -944,9 +934,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
cmd_type_len |= IXGBE_TXD_CMD_RS;
- /* Update txq RS bit counters */
- txq->nb_tx_used = 0;
- txp = NULL;
+ /* Record the last descriptor ID for the bucket we're leaving */
+ txq->rs_last_id[pkt_rs_idx] = tx_last;
} else
txp = txd;
@@ -2521,6 +2510,7 @@ ixgbe_tx_queue_release(struct ci_tx_queue *txq)
if (txq != NULL && txq->ops != NULL) {
ci_txq_release_all_mbufs(txq, false);
txq->ops->free_swring(txq);
+ rte_free(txq->rs_last_id);
rte_memzone_free(txq->mz);
rte_free(txq);
}
@@ -2825,6 +2815,13 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
(int)dev->data->port_id, (int)queue_idx);
return -(EINVAL);
}
+ if (!rte_is_power_of_2(tx_rs_thresh)) {
+ PMD_INIT_LOG(ERR, "tx_rs_thresh must be a power of 2. (tx_rs_thresh=%u port=%d queue=%d)",
+ (unsigned int)tx_rs_thresh,
+ (int)dev->data->port_id,
+ (int)queue_idx);
+ return -(EINVAL);
+ }
/*
* If rs_bit_thresh is greater than 1, then TX WTHRESH should be
@@ -2870,6 +2867,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->mz = tz;
txq->nb_tx_desc = nb_desc;
txq->tx_rs_thresh = tx_rs_thresh;
+ txq->log2_rs_thresh = rte_log2_u32(tx_rs_thresh);
txq->tx_free_thresh = tx_free_thresh;
txq->pthresh = tx_conf->tx_thresh.pthresh;
txq->hthresh = tx_conf->tx_thresh.hthresh;
@@ -2911,6 +2909,16 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
txq->sw_ring, txq->ixgbe_tx_ring, txq->tx_ring_dma);
+ /* Allocate RS last_id tracking array */
+ uint16_t num_rs_buckets = nb_desc / tx_rs_thresh;
+ txq->rs_last_id = rte_zmalloc_socket(NULL, sizeof(txq->rs_last_id[0]) * num_rs_buckets,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq->rs_last_id == NULL) {
+ ixgbe_tx_queue_release(txq);
+ PMD_DRV_LOG(ERR, "Failed to allocate memory for RS last_id array");
+ return -ENOMEM;
+ }
+
/* set up vector or scalar TX function as appropriate */
ixgbe_set_tx_function(dev, txq);
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 26/27] net/intel: drop unused Tx queue used count
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (24 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 25/27] net/ixgbe: " Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 27/27] net/intel: remove index for tracking end of packet Bruce Richardson
26 siblings, 0 replies; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Vladimir Medvedkin, Anatoly Burakov,
Jingjing Wu, Praveen Shetty
Since drivers now track the setting of the RS bit based on fixed
thresholds rather than after a fixed number of descriptors, we no longer
need to track the number of descriptors used from one call to another.
Therefore we can remove the tx_used value in the Tx queue structure.
This value was still being used inside the IDPF splitq scalar code,
however, the ipdf driver-specific section of the Tx queue structure also
had an rs_compl_count value that was only used for the vector code
paths, so we can use it to replace the old tx_used value in the scalar
path.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 1 -
drivers/net/intel/common/tx_scalar_fns.h | 1 -
drivers/net/intel/i40e/i40e_rxtx.c | 1 -
drivers/net/intel/iavf/iavf_rxtx.c | 1 -
drivers/net/intel/ice/ice_dcf_ethdev.c | 1 -
drivers/net/intel/ice/ice_rxtx.c | 1 -
drivers/net/intel/idpf/idpf_common_rxtx.c | 8 +++-----
drivers/net/intel/ixgbe/ixgbe_rxtx.c | 8 --------
drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.c | 1 -
9 files changed, 3 insertions(+), 20 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 9b3f8385e6..3976766f06 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -130,7 +130,6 @@ struct ci_tx_queue {
uint16_t *rs_last_id;
uint16_t nb_tx_desc; /* number of TX descriptors */
uint16_t tx_tail; /* current value of tail register */
- uint16_t nb_tx_used; /* number of TX desc used since RS bit set */
/* index to last TX descriptor to have been cleaned */
uint16_t last_desc_cleaned;
/* Total number of TX descriptors ready to be allocated. */
diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h
index 3d0a23eda3..27a5dafefc 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -404,7 +404,6 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
m_seg = m_seg->next;
} while (m_seg);
end_pkt:
- txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
/* Check if packet crosses into a new RS threshold bucket.
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index 1fadd0407a..e1226d649b 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -2632,7 +2632,6 @@ i40e_reset_tx_queue(struct ci_tx_queue *txq)
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
txq->tx_tail = 0;
- txq->nb_tx_used = 0;
txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 4517d55011..9cac6e8841 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -298,7 +298,6 @@ reset_tx_queue(struct ci_tx_queue *txq)
}
txq->tx_tail = 0;
- txq->nb_tx_used = 0;
txq->last_desc_cleaned = txq->nb_tx_desc - 1;
txq->nb_tx_free = txq->nb_tx_desc - 1;
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index 4ceecc15c6..02a23629d6 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -414,7 +414,6 @@ reset_tx_queue(struct ci_tx_queue *txq)
}
txq->tx_tail = 0;
- txq->nb_tx_used = 0;
txq->last_desc_cleaned = txq->nb_tx_desc - 1;
txq->nb_tx_free = txq->nb_tx_desc - 1;
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index a6a454ddf5..092981f452 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -1127,7 +1127,6 @@ ice_reset_tx_queue(struct ci_tx_queue *txq)
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
txq->tx_tail = 0;
- txq->nb_tx_used = 0;
txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index 1d123f6350..b36e29c8d2 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -224,7 +224,6 @@ idpf_qc_split_tx_descq_reset(struct ci_tx_queue *txq)
}
txq->tx_tail = 0;
- txq->nb_tx_used = 0;
/* Use this as next to clean for split desc queue */
txq->last_desc_cleaned = 0;
@@ -284,7 +283,6 @@ idpf_qc_single_tx_queue_reset(struct ci_tx_queue *txq)
}
txq->tx_tail = 0;
- txq->nb_tx_used = 0;
txq->last_desc_cleaned = txq->nb_tx_desc - 1;
txq->nb_tx_free = txq->nb_tx_desc - 1;
@@ -993,12 +991,12 @@ idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_EOP;
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
- txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
+ txq->rs_compl_count += nb_used;
- if (txq->nb_tx_used >= 32) {
+ if (txq->rs_compl_count >= 32) {
txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_RE;
/* Update txq RE bit counters */
- txq->nb_tx_used = 0;
+ txq->rs_compl_count = 0;
}
}
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 3eeec220fd..6b8ff20f61 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -708,12 +708,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
*/
nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
- if (txp != NULL &&
- nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
- /* set RS on the previous packet in the burst */
- txp->read.cmd_type_len |=
- rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
-
/*
* The number of descriptors that must be allocated for a
* packet is the number of segments of that packet, plus 1
@@ -912,7 +906,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
* The last packet data descriptor needs End Of Packet (EOP)
*/
cmd_type_len |= IXGBE_TXD_CMD_EOP;
- txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
/*
@@ -2551,7 +2544,6 @@ ixgbe_reset_tx_queue(struct ci_tx_queue *txq)
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
txq->tx_tail = 0;
- txq->nb_tx_used = 0;
/*
* Always allow 1 descriptor to be un-allocated to avoid
* a H/W race condition
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.c b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.c
index eb7c79eaf9..63c7cb50d3 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.c
@@ -47,7 +47,6 @@ ixgbe_reset_tx_queue_vec(struct ci_tx_queue *txq)
txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
txq->tx_tail = 0;
- txq->nb_tx_used = 0;
/*
* Always allow 1 descriptor to be un-allocated to avoid
* a H/W race condition
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* [RFC PATCH 27/27] net/intel: remove index for tracking end of packet
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
` (25 preceding siblings ...)
2025-12-19 17:25 ` [RFC PATCH 26/27] net/intel: drop unused Tx queue used count Bruce Richardson
@ 2025-12-19 17:25 ` Bruce Richardson
2025-12-20 9:05 ` Morten Brørup
26 siblings, 1 reply; 30+ messages in thread
From: Bruce Richardson @ 2025-12-19 17:25 UTC (permalink / raw)
To: dev
Cc: Bruce Richardson, Vladimir Medvedkin, Anatoly Burakov,
Jingjing Wu, Praveen Shetty
The last_id value in each tx_sw_queue entry was no longer used in the
datapath, remove it and its initialization. For the function releasing
packets back, rather than relying on "last_id" to identify end of
packet, instead check for the next pointer being NULL.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
drivers/net/intel/common/tx.h | 1 -
drivers/net/intel/i40e/i40e_rxtx.c | 8 +++-----
drivers/net/intel/iavf/iavf_rxtx.c | 9 ++++-----
drivers/net/intel/ice/ice_dcf_ethdev.c | 1 -
drivers/net/intel/ice/ice_rxtx.c | 9 ++++-----
drivers/net/intel/idpf/idpf_common_rxtx.c | 2 --
drivers/net/intel/ixgbe/ixgbe_rxtx.c | 9 ++++-----
7 files changed, 15 insertions(+), 24 deletions(-)
diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 3976766f06..2d3626cbda 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -104,7 +104,6 @@ struct ci_tx_queue;
struct ci_tx_entry {
struct rte_mbuf *mbuf; /* mbuf associated with TX desc, if any. */
uint16_t next_id; /* Index of next descriptor in ring. */
- uint16_t last_id; /* Index of last scattered descriptor. */
};
/**
diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
index e1226d649b..9aa31d6168 100644
--- a/drivers/net/intel/i40e/i40e_rxtx.c
+++ b/drivers/net/intel/i40e/i40e_rxtx.c
@@ -2523,14 +2523,13 @@ i40e_tx_done_cleanup_full(struct ci_tx_queue *txq,
pkt_cnt < free_cnt &&
tx_id != tx_last; i++) {
if (swr_ring[tx_id].mbuf != NULL) {
- rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
- swr_ring[tx_id].mbuf = NULL;
-
/*
* last segment in the packet,
* increment packet count
*/
- pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+ pkt_cnt += (swr_ring[tx_id].mbuf->next == NULL) ? 1 : 0;
+ rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+ swr_ring[tx_id].mbuf = NULL;
}
tx_id = swr_ring[tx_id].next_id;
@@ -2623,7 +2622,6 @@ i40e_reset_tx_queue(struct ci_tx_queue *txq)
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
- txe[i].last_id = i;
txe[prev].next_id = i;
prev = i;
}
diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c
index 9cac6e8841..558ae2598f 100644
--- a/drivers/net/intel/iavf/iavf_rxtx.c
+++ b/drivers/net/intel/iavf/iavf_rxtx.c
@@ -292,7 +292,6 @@ reset_tx_queue(struct ci_tx_queue *txq)
txq->ci_tx_ring[i].cmd_type_offset_bsz =
rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
- txe[i].last_id = i;
txe[prev].next_id = i;
prev = i;
}
@@ -4002,14 +4001,14 @@ iavf_tx_done_cleanup_full(struct ci_tx_queue *txq,
while (pkt_cnt < free_cnt) {
do {
if (swr_ring[tx_id].mbuf != NULL) {
- rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
- swr_ring[tx_id].mbuf = NULL;
-
/*
* last segment in the packet,
* increment packet count
*/
- pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+ pkt_cnt += (swr_ring[tx_id].mbuf->next == NULL) ? 1 : 0;
+ rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+ swr_ring[tx_id].mbuf = NULL;
+
}
tx_id = swr_ring[tx_id].next_id;
diff --git a/drivers/net/intel/ice/ice_dcf_ethdev.c b/drivers/net/intel/ice/ice_dcf_ethdev.c
index 02a23629d6..abd7875e7b 100644
--- a/drivers/net/intel/ice/ice_dcf_ethdev.c
+++ b/drivers/net/intel/ice/ice_dcf_ethdev.c
@@ -408,7 +408,6 @@ reset_tx_queue(struct ci_tx_queue *txq)
txq->ci_tx_ring[i].cmd_type_offset_bsz =
rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
- txe[i].last_id = i;
txe[prev].next_id = i;
prev = i;
}
diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
index 092981f452..d11d9054f2 100644
--- a/drivers/net/intel/ice/ice_rxtx.c
+++ b/drivers/net/intel/ice/ice_rxtx.c
@@ -1118,7 +1118,6 @@ ice_reset_tx_queue(struct ci_tx_queue *txq)
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
- txe[i].last_id = i;
txe[prev].next_id = i;
prev = i;
}
@@ -3190,14 +3189,14 @@ ice_tx_done_cleanup_full(struct ci_tx_queue *txq,
pkt_cnt < free_cnt &&
tx_id != tx_last; i++) {
if (swr_ring[tx_id].mbuf != NULL) {
- rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
- swr_ring[tx_id].mbuf = NULL;
-
/*
* last segment in the packet,
* increment packet count
*/
- pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+ pkt_cnt += swr_ring[tx_id].mbuf->next == NULL ? 1 : 0;
+ rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+ swr_ring[tx_id].mbuf = NULL;
+
}
tx_id = swr_ring[tx_id].next_id;
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index b36e29c8d2..781310e564 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -218,7 +218,6 @@ idpf_qc_split_tx_descq_reset(struct ci_tx_queue *txq)
prev = (uint16_t)(txq->sw_nb_desc - 1);
for (i = 0; i < txq->sw_nb_desc; i++) {
txe[i].mbuf = NULL;
- txe[i].last_id = i;
txe[prev].next_id = i;
prev = i;
}
@@ -277,7 +276,6 @@ idpf_qc_single_tx_queue_reset(struct ci_tx_queue *txq)
txq->ci_tx_ring[i].cmd_type_offset_bsz =
rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE);
txe[i].mbuf = NULL;
- txe[i].last_id = i;
txe[prev].next_id = i;
prev = i;
}
diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index 6b8ff20f61..5f4bee4f2f 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -2407,14 +2407,14 @@ ixgbe_tx_done_cleanup_full(struct ci_tx_queue *txq, uint32_t free_cnt)
pkt_cnt < free_cnt &&
tx_id != tx_last; i++) {
if (swr_ring[tx_id].mbuf != NULL) {
- rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
- swr_ring[tx_id].mbuf = NULL;
-
/*
* last segment in the packet,
* increment packet count
*/
- pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+ pkt_cnt += swr_ring[tx_id].mbuf->next == NULL ? 1 : 0;
+ rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+ swr_ring[tx_id].mbuf = NULL;
+
}
tx_id = swr_ring[tx_id].next_id;
@@ -2535,7 +2535,6 @@ ixgbe_reset_tx_queue(struct ci_tx_queue *txq)
txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
txe[i].mbuf = NULL;
- txe[i].last_id = i;
txe[prev].next_id = i;
prev = i;
}
--
2.51.0
^ permalink raw reply [flat|nested] 30+ messages in thread* RE: [RFC PATCH 27/27] net/intel: remove index for tracking end of packet
2025-12-19 17:25 ` [RFC PATCH 27/27] net/intel: remove index for tracking end of packet Bruce Richardson
@ 2025-12-20 9:05 ` Morten Brørup
0 siblings, 0 replies; 30+ messages in thread
From: Morten Brørup @ 2025-12-20 9:05 UTC (permalink / raw)
To: Bruce Richardson, dev
Cc: Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu, Praveen Shetty
> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> Sent: Friday, 19 December 2025 18.26
>
> The last_id value in each tx_sw_queue entry was no longer used in the
> datapath, remove it and its initialization. For the function releasing
> packets back, rather than relying on "last_id" to identify end of
> packet, instead check for the next pointer being NULL.
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
[...]
> @@ -2523,14 +2523,13 @@ i40e_tx_done_cleanup_full(struct ci_tx_queue
> *txq,
> pkt_cnt < free_cnt &&
> tx_id != tx_last; i++) {
> if (swr_ring[tx_id].mbuf != NULL) {
> - rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
> - swr_ring[tx_id].mbuf = NULL;
> -
> /*
> * last segment in the packet,
> * increment packet count
> */
> - pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
> + pkt_cnt += (swr_ring[tx_id].mbuf->next == NULL)
> ? 1 : 0;
Note to reviewers:
Dereferencing the mbuf (instead of checking last_id) does not add a potential cache miss, because rte_pktmbuf_free_seg() dereferences it anyway.
> + rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
> + swr_ring[tx_id].mbuf = NULL;
> }
>
> tx_id = swr_ring[tx_id].next_id;
^ permalink raw reply [flat|nested] 30+ messages in thread