From: Rahul Bhansali <rbhansali@marvell.com>
To: <dev@dpdk.org>, Pavan Nikhilesh <pbhagavatula@marvell.com>,
"Shijith Thotton" <sthotton@marvell.com>,
Nithin Dabilpuram <ndabilpuram@marvell.com>,
Kiran Kumar K <kirankumark@marvell.com>,
Sunil Kumar Kori <skori@marvell.com>,
Satha Rao <skoteshwar@marvell.com>,
Rakesh Kudurumalla <rkudurumalla@marvell.com>
Cc: <jerinj@marvell.com>, Rahul Bhansali <rbhansali@marvell.com>,
<stable@dpdk.org>
Subject: [PATCH v2] net/cnxk: performance improvement for SW mbuf free
Date: Fri, 1 Mar 2024 08:46:45 +0530 [thread overview]
Message-ID: <20240301031645.1656237-1-rbhansali@marvell.com> (raw)
In-Reply-To: <20240227144247.1562499-1-rbhansali@marvell.com>
Performance improvement is done for Tx fastpath flag MBUF_NOFF when
tx_compl_ena is false and mbuf has an external buffer.
In such case, Instead of individual external mbuf free before LMTST,
a chain of external mbuf will be created and free all after LMTST.
This not only improve the performance but also fixes SQ corruption.
CN10k performance improvement is ~14%.
CN9k performance improvement is ~20%.
Fixes: 51a636528515 ("net/cnxk: fix crash during Tx completion")
Cc: stable@dpdk.org
Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>
---
Changes in v2: updated release_24_03.rst for SW mbuf free optimization
doc/guides/rel_notes/release_24_03.rst | 1 +
drivers/event/cnxk/cn10k_tx_worker.h | 8 ++-
drivers/event/cnxk/cn9k_worker.h | 9 ++-
drivers/net/cnxk/cn10k_tx.h | 97 ++++++++++++++++++--------
drivers/net/cnxk/cn9k_tx.h | 88 +++++++++++++++--------
5 files changed, 136 insertions(+), 67 deletions(-)
diff --git a/doc/guides/rel_notes/release_24_03.rst b/doc/guides/rel_notes/release_24_03.rst
index 8d440d56a5..39ffef11b0 100644
--- a/doc/guides/rel_notes/release_24_03.rst
+++ b/doc/guides/rel_notes/release_24_03.rst
@@ -111,6 +111,7 @@ New Features
* Added support for ``RTE_FLOW_ITEM_TYPE_PPPOES`` flow item.
* Added support for ``RTE_FLOW_ACTION_TYPE_SAMPLE`` flow item.
* Added support for Rx inject.
+ * Optimized SW external mbuf free for better performance and avoid SQ corruption.
* **Updated Marvell OCTEON EP driver.**
diff --git a/drivers/event/cnxk/cn10k_tx_worker.h b/drivers/event/cnxk/cn10k_tx_worker.h
index 53e0dde20c..256237b895 100644
--- a/drivers/event/cnxk/cn10k_tx_worker.h
+++ b/drivers/event/cnxk/cn10k_tx_worker.h
@@ -70,6 +70,7 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
const uint64_t *txq_data, const uint32_t flags)
{
uint8_t lnum = 0, loff = 0, shft = 0;
+ struct rte_mbuf *extm = NULL;
struct cn10k_eth_txq *txq;
uintptr_t laddr;
uint16_t segdw;
@@ -90,7 +91,7 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
if (flags & NIX_TX_OFFLOAD_TSO_F)
cn10k_nix_xmit_prepare_tso(m, flags);
- cn10k_nix_xmit_prepare(txq, m, cmd, flags, txq->lso_tun_fmt, &sec,
+ cn10k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, &sec,
txq->mark_flag, txq->mark_fmt);
laddr = lmt_addr;
@@ -105,7 +106,7 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
if (flags & NIX_TX_MULTI_SEG_F)
- segdw = cn10k_nix_prepare_mseg(txq, m, (uint64_t *)laddr, flags);
+ segdw = cn10k_nix_prepare_mseg(txq, m, &extm, (uint64_t *)laddr, flags);
else
segdw = cn10k_nix_tx_ext_subs(flags) + 2;
@@ -127,6 +128,9 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
/* Memory barrier to make sure lmtst store completes */
rte_io_wmb();
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+ cn10k_nix_free_extmbuf(extm);
+
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 0451157812..107265d54b 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -746,7 +746,7 @@ static __rte_always_inline uint16_t
cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
uint64_t *txq_data, const uint32_t flags)
{
- struct rte_mbuf *m = ev->mbuf;
+ struct rte_mbuf *m = ev->mbuf, *extm = NULL;
struct cn9k_eth_txq *txq;
/* Perform header writes before barrier for TSO */
@@ -767,7 +767,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
if (cn9k_sso_sq_depth(txq) <= 0)
return 0;
cn9k_nix_tx_skeleton(txq, cmd, flags, 0);
- cn9k_nix_xmit_prepare(txq, m, cmd, flags, txq->lso_tun_fmt, txq->mark_flag,
+ cn9k_nix_xmit_prepare(txq, m, &extm, cmd, flags, txq->lso_tun_fmt, txq->mark_flag,
txq->mark_fmt);
if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
@@ -789,7 +789,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
}
if (flags & NIX_TX_MULTI_SEG_F) {
- const uint16_t segdw = cn9k_nix_prepare_mseg(txq, m, cmd, flags);
+ const uint16_t segdw = cn9k_nix_prepare_mseg(txq, m, &extm, cmd, flags);
cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, segdw,
flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
@@ -819,6 +819,9 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
}
done:
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+ cn9k_nix_free_extmbuf(extm);
+
return 1;
}
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 266c899a05..5c4b9e559e 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -733,8 +733,19 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
}
#endif
+static inline void
+cn10k_nix_free_extmbuf(struct rte_mbuf *m)
+{
+ struct rte_mbuf *m_next;
+ while (m != NULL) {
+ m_next = m->next;
+ rte_pktmbuf_free_seg(m);
+ m = m_next;
+ }
+}
+
static __rte_always_inline uint64_t
-cn10k_nix_prefree_seg(struct rte_mbuf *m, struct cn10k_eth_txq *txq,
+cn10k_nix_prefree_seg(struct rte_mbuf *m, struct rte_mbuf **extm, struct cn10k_eth_txq *txq,
struct nix_send_hdr_s *send_hdr, uint64_t *aura)
{
struct rte_mbuf *prev = NULL;
@@ -742,7 +753,8 @@ cn10k_nix_prefree_seg(struct rte_mbuf *m, struct cn10k_eth_txq *txq,
if (RTE_MBUF_HAS_EXTBUF(m)) {
if (unlikely(txq->tx_compl.ena == 0)) {
- rte_pktmbuf_free_seg(m);
+ m->next = *extm;
+ *extm = m;
return 1;
}
if (send_hdr->w0.pnc) {
@@ -766,7 +778,8 @@ cn10k_nix_prefree_seg(struct rte_mbuf *m, struct cn10k_eth_txq *txq,
#if defined(RTE_ARCH_ARM64)
/* Only called for first segments of single segmented mbufs */
static __rte_always_inline void
-cn10k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn10k_eth_txq *txq,
+cn10k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct rte_mbuf **extm,
+ struct cn10k_eth_txq *txq,
uint64x2_t *senddesc01_w0, uint64x2_t *senddesc23_w0,
uint64x2_t *senddesc01_w1, uint64x2_t *senddesc23_w1)
{
@@ -790,7 +803,8 @@ cn10k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn10k_eth_txq *txq,
w1 = vgetq_lane_u64(*senddesc01_w1, 0);
w1 &= ~0xFFFF000000000000UL;
if (unlikely(!tx_compl_ena)) {
- rte_pktmbuf_free_seg(m0);
+ m0->next = *extm;
+ *extm = m0;
} else {
sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
rte_memory_order_relaxed);
@@ -820,7 +834,8 @@ cn10k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn10k_eth_txq *txq,
w1 = vgetq_lane_u64(*senddesc01_w1, 1);
w1 &= ~0xFFFF000000000000UL;
if (unlikely(!tx_compl_ena)) {
- rte_pktmbuf_free_seg(m1);
+ m1->next = *extm;
+ *extm = m1;
} else {
sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
rte_memory_order_relaxed);
@@ -850,7 +865,8 @@ cn10k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn10k_eth_txq *txq,
w1 = vgetq_lane_u64(*senddesc23_w1, 0);
w1 &= ~0xFFFF000000000000UL;
if (unlikely(!tx_compl_ena)) {
- rte_pktmbuf_free_seg(m2);
+ m2->next = *extm;
+ *extm = m2;
} else {
sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
rte_memory_order_relaxed);
@@ -880,7 +896,8 @@ cn10k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn10k_eth_txq *txq,
w1 = vgetq_lane_u64(*senddesc23_w1, 1);
w1 &= ~0xFFFF000000000000UL;
if (unlikely(!tx_compl_ena)) {
- rte_pktmbuf_free_seg(m3);
+ m3->next = *extm;
+ *extm = m3;
} else {
sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
rte_memory_order_relaxed);
@@ -962,9 +979,9 @@ cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
static __rte_always_inline void
cn10k_nix_xmit_prepare(struct cn10k_eth_txq *txq,
- struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
- const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
- uint64_t mark_fmt)
+ struct rte_mbuf *m, struct rte_mbuf **extm, uint64_t *cmd,
+ const uint16_t flags, const uint64_t lso_tun_fmt, bool *sec,
+ uint8_t mark_flag, uint64_t mark_fmt)
{
uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
struct nix_send_ext_s *send_hdr_ext;
@@ -1164,7 +1181,7 @@ cn10k_nix_xmit_prepare(struct cn10k_eth_txq *txq,
* DF bit = 0 otherwise
*/
aura = send_hdr->w0.aura;
- send_hdr->w0.df = cn10k_nix_prefree_seg(m, txq, send_hdr, &aura);
+ send_hdr->w0.df = cn10k_nix_prefree_seg(m, extm, txq, send_hdr, &aura);
send_hdr->w0.aura = aura;
}
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -1240,8 +1257,8 @@ cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
}
static __rte_always_inline uint16_t
-cn10k_nix_prepare_mseg(struct cn10k_eth_txq *txq,
- struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
+cn10k_nix_prepare_mseg(struct cn10k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm,
+ uint64_t *cmd, const uint16_t flags)
{
uint64_t prefree = 0, aura0, aura, nb_segs, segdw;
struct nix_send_hdr_s *send_hdr;
@@ -1284,7 +1301,7 @@ cn10k_nix_prepare_mseg(struct cn10k_eth_txq *txq,
/* Set invert df if buffer is not to be freed by H/W */
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
aura = send_hdr->w0.aura;
- prefree = cn10k_nix_prefree_seg(m, txq, send_hdr, &aura);
+ prefree = cn10k_nix_prefree_seg(m, extm, txq, send_hdr, &aura);
send_hdr->w0.aura = aura;
l_sg.i1 = prefree;
}
@@ -1331,7 +1348,7 @@ cn10k_nix_prepare_mseg(struct cn10k_eth_txq *txq,
cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
- prefree = cn10k_nix_prefree_seg(m, txq, send_hdr, &aura);
+ prefree = cn10k_nix_prefree_seg(m, extm, txq, send_hdr, &aura);
is_sg2 = aura != aura0 && !prefree;
}
@@ -1425,6 +1442,7 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
uint8_t lnum, c_lnum, c_shft, c_loff;
uintptr_t pa, lbase = txq->lmt_base;
uint16_t lmt_id, burst, left, i;
+ struct rte_mbuf *extm = NULL;
uintptr_t c_lbase = lbase;
uint64_t lso_tun_fmt = 0;
uint64_t mark_fmt = 0;
@@ -1479,7 +1497,7 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
if (flags & NIX_TX_OFFLOAD_TSO_F)
cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
- cn10k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,
+ cn10k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt,
&sec, mark_flag, mark_fmt);
laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
@@ -1554,6 +1572,11 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
}
rte_io_wmb();
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena) {
+ cn10k_nix_free_extmbuf(extm);
+ extm = NULL;
+ }
+
if (left)
goto again;
@@ -1569,6 +1592,7 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
uintptr_t pa0, pa1, lbase = txq->lmt_base;
const rte_iova_t io_addr = txq->io_addr;
uint16_t segdw, lmt_id, burst, left, i;
+ struct rte_mbuf *extm = NULL;
uint8_t lnum, c_lnum, c_loff;
uintptr_t c_lbase = lbase;
uint64_t lso_tun_fmt = 0;
@@ -1630,7 +1654,7 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
if (flags & NIX_TX_OFFLOAD_TSO_F)
cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
- cn10k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,
+ cn10k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt,
&sec, mark_flag, mark_fmt);
laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
@@ -1644,7 +1668,7 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
/* Move NIX desc to LMT/NIXTX area */
cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
/* Store sg list directly on lmt line */
- segdw = cn10k_nix_prepare_mseg(txq, tx_pkts[i], (uint64_t *)laddr,
+ segdw = cn10k_nix_prepare_mseg(txq, tx_pkts[i], &extm, (uint64_t *)laddr,
flags);
cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
segdw, flags);
@@ -1717,6 +1741,11 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
}
rte_io_wmb();
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena) {
+ cn10k_nix_free_extmbuf(extm);
+ extm = NULL;
+ }
+
if (left)
goto again;
@@ -1767,7 +1796,7 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
static __rte_always_inline uint16_t
cn10k_nix_prepare_mseg_vec_noff(struct cn10k_eth_txq *txq,
- struct rte_mbuf *m, uint64_t *cmd,
+ struct rte_mbuf *m, struct rte_mbuf **extm, uint64_t *cmd,
uint64x2_t *cmd0, uint64x2_t *cmd1,
uint64x2_t *cmd2, uint64x2_t *cmd3,
const uint32_t flags)
@@ -1782,7 +1811,7 @@ cn10k_nix_prepare_mseg_vec_noff(struct cn10k_eth_txq *txq,
vst1q_u64(cmd + 2, *cmd1); /* sg */
}
- segdw = cn10k_nix_prepare_mseg(txq, m, cmd, flags);
+ segdw = cn10k_nix_prepare_mseg(txq, m, extm, cmd, flags);
if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
vst1q_u64(cmd + segdw * 2 - 2, *cmd3);
@@ -1892,7 +1921,7 @@ cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
static __rte_always_inline uint8_t
cn10k_nix_prep_lmt_mseg_vector(struct cn10k_eth_txq *txq,
- struct rte_mbuf **mbufs, uint64x2_t *cmd0,
+ struct rte_mbuf **mbufs, struct rte_mbuf **extm, uint64x2_t *cmd0,
uint64x2_t *cmd1, uint64x2_t *cmd2,
uint64x2_t *cmd3, uint8_t *segdw,
uint64_t *lmt_addr, __uint128_t *data128,
@@ -1910,7 +1939,7 @@ cn10k_nix_prep_lmt_mseg_vector(struct cn10k_eth_txq *txq,
lmt_addr += 16;
off = 0;
}
- off += cn10k_nix_prepare_mseg_vec_noff(txq, mbufs[j],
+ off += cn10k_nix_prepare_mseg_vec_noff(txq, mbufs[j], extm,
lmt_addr + off * 2, &cmd0[j], &cmd1[j],
&cmd2[j], &cmd3[j], flags);
}
@@ -2063,14 +2092,14 @@ cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
static __rte_always_inline void
cn10k_nix_xmit_store(struct cn10k_eth_txq *txq,
- struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
+ struct rte_mbuf *mbuf, struct rte_mbuf **extm, uint8_t segdw, uintptr_t laddr,
uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
uint64x2_t cmd3, const uint16_t flags)
{
uint8_t off;
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
- cn10k_nix_prepare_mseg_vec_noff(txq, mbuf, LMT_OFF(laddr, 0, 0),
+ cn10k_nix_prepare_mseg_vec_noff(txq, mbuf, extm, LMT_OFF(laddr, 0, 0),
&cmd0, &cmd1, &cmd2, &cmd3,
flags);
return;
@@ -2154,6 +2183,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
__uint128_t data128;
uint64_t data[2];
} wd;
+ struct rte_mbuf *extm = NULL;
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
handle_tx_completion_pkts(txq, flags & NIX_TX_VWQE_F);
@@ -3003,8 +3033,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
!(flags & NIX_TX_MULTI_SEG_F) &&
!(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
/* Set don't free bit if reference count > 1 */
- cn10k_nix_prefree_seg_vec(tx_pkts, txq, &senddesc01_w0, &senddesc23_w0,
- &senddesc01_w1, &senddesc23_w1);
+ cn10k_nix_prefree_seg_vec(tx_pkts, &extm, txq, &senddesc01_w0,
+ &senddesc23_w0, &senddesc01_w1, &senddesc23_w1);
} else if (!(flags & NIX_TX_MULTI_SEG_F) &&
!(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
/* Move mbufs to iova */
@@ -3076,7 +3106,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
&shift, &wd.data128, &next);
/* Store mbuf0 to LMTLINE/CPT NIXTX area */
- cn10k_nix_xmit_store(txq, tx_pkts[0], segdw[0], next,
+ cn10k_nix_xmit_store(txq, tx_pkts[0], &extm, segdw[0], next,
cmd0[0], cmd1[0], cmd2[0], cmd3[0],
flags);
@@ -3092,7 +3122,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
&shift, &wd.data128, &next);
/* Store mbuf1 to LMTLINE/CPT NIXTX area */
- cn10k_nix_xmit_store(txq, tx_pkts[1], segdw[1], next,
+ cn10k_nix_xmit_store(txq, tx_pkts[1], &extm, segdw[1], next,
cmd0[1], cmd1[1], cmd2[1], cmd3[1],
flags);
@@ -3108,7 +3138,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
&shift, &wd.data128, &next);
/* Store mbuf2 to LMTLINE/CPT NIXTX area */
- cn10k_nix_xmit_store(txq, tx_pkts[2], segdw[2], next,
+ cn10k_nix_xmit_store(txq, tx_pkts[2], &extm, segdw[2], next,
cmd0[2], cmd1[2], cmd2[2], cmd3[2],
flags);
@@ -3124,7 +3154,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
&shift, &wd.data128, &next);
/* Store mbuf3 to LMTLINE/CPT NIXTX area */
- cn10k_nix_xmit_store(txq, tx_pkts[3], segdw[3], next,
+ cn10k_nix_xmit_store(txq, tx_pkts[3], &extm, segdw[3], next,
cmd0[3], cmd1[3], cmd2[3], cmd3[3],
flags);
@@ -3132,7 +3162,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
uint8_t j;
segdw[4] = 8;
- j = cn10k_nix_prep_lmt_mseg_vector(txq, tx_pkts, cmd0, cmd1,
+ j = cn10k_nix_prep_lmt_mseg_vector(txq, tx_pkts, &extm, cmd0, cmd1,
cmd2, cmd3, segdw,
(uint64_t *)
LMT_OFF(laddr, lnum,
@@ -3282,6 +3312,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
}
rte_io_wmb();
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena) {
+ cn10k_nix_free_extmbuf(extm);
+ extm = NULL;
+ }
+
if (left)
goto again;
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 94acbe64fa..018fae2eb7 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -82,16 +82,28 @@ cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,
}
}
+static __rte_always_inline void
+cn9k_nix_free_extmbuf(struct rte_mbuf *m)
+{
+ struct rte_mbuf *m_next;
+ while (m != NULL) {
+ m_next = m->next;
+ rte_pktmbuf_free_seg(m);
+ m = m_next;
+ }
+}
+
static __rte_always_inline uint64_t
-cn9k_nix_prefree_seg(struct rte_mbuf *m, struct cn9k_eth_txq *txq, struct nix_send_hdr_s *send_hdr,
- uint64_t *aura)
+cn9k_nix_prefree_seg(struct rte_mbuf *m, struct rte_mbuf **extm, struct cn9k_eth_txq *txq,
+ struct nix_send_hdr_s *send_hdr, uint64_t *aura)
{
struct rte_mbuf *prev;
uint32_t sqe_id;
if (RTE_MBUF_HAS_EXTBUF(m)) {
if (unlikely(txq->tx_compl.ena == 0)) {
- rte_pktmbuf_free_seg(m);
+ m->next = *extm;
+ *extm = m;
return 1;
}
if (send_hdr->w0.pnc) {
@@ -115,7 +127,7 @@ cn9k_nix_prefree_seg(struct rte_mbuf *m, struct cn9k_eth_txq *txq, struct nix_se
#if defined(RTE_ARCH_ARM64)
/* Only called for first segments of single segmented mbufs */
static __rte_always_inline void
-cn9k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn9k_eth_txq *txq,
+cn9k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct rte_mbuf **extm, struct cn9k_eth_txq *txq,
uint64x2_t *senddesc01_w0, uint64x2_t *senddesc23_w0,
uint64x2_t *senddesc01_w1, uint64x2_t *senddesc23_w1)
{
@@ -139,7 +151,8 @@ cn9k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn9k_eth_txq *txq,
w1 = vgetq_lane_u64(*senddesc01_w1, 0);
w1 &= ~0xFFFF000000000000UL;
if (unlikely(!tx_compl_ena)) {
- rte_pktmbuf_free_seg(m0);
+ m0->next = *extm;
+ *extm = m0;
} else {
sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
rte_memory_order_relaxed);
@@ -169,7 +182,8 @@ cn9k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn9k_eth_txq *txq,
w1 = vgetq_lane_u64(*senddesc01_w1, 1);
w1 &= ~0xFFFF000000000000UL;
if (unlikely(!tx_compl_ena)) {
- rte_pktmbuf_free_seg(m1);
+ m1->next = *extm;
+ *extm = m1;
} else {
sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
rte_memory_order_relaxed);
@@ -199,7 +213,8 @@ cn9k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn9k_eth_txq *txq,
w1 = vgetq_lane_u64(*senddesc23_w1, 0);
w1 &= ~0xFFFF000000000000UL;
if (unlikely(!tx_compl_ena)) {
- rte_pktmbuf_free_seg(m2);
+ m2->next = *extm;
+ *extm = m2;
} else {
sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
rte_memory_order_relaxed);
@@ -229,7 +244,8 @@ cn9k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct cn9k_eth_txq *txq,
w1 = vgetq_lane_u64(*senddesc23_w1, 1);
w1 &= ~0xFFFF000000000000UL;
if (unlikely(!tx_compl_ena)) {
- rte_pktmbuf_free_seg(m3);
+ m3->next = *extm;
+ *extm = m3;
} else {
sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
rte_memory_order_relaxed);
@@ -310,10 +326,9 @@ cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
}
static __rte_always_inline void
-cn9k_nix_xmit_prepare(struct cn9k_eth_txq *txq,
- struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
- const uint64_t lso_tun_fmt, uint8_t mark_flag,
- uint64_t mark_fmt)
+cn9k_nix_xmit_prepare(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm,
+ uint64_t *cmd, const uint16_t flags, const uint64_t lso_tun_fmt,
+ uint8_t mark_flag, uint64_t mark_fmt)
{
uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
struct nix_send_ext_s *send_hdr_ext;
@@ -509,7 +524,7 @@ cn9k_nix_xmit_prepare(struct cn9k_eth_txq *txq,
* DF bit = 0 otherwise
*/
aura = send_hdr->w0.aura;
- send_hdr->w0.df = cn9k_nix_prefree_seg(m, txq, send_hdr, &aura);
+ send_hdr->w0.df = cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura);
send_hdr->w0.aura = aura;
/* Ensuring mbuf fields which got updated in
* cnxk_nix_prefree_seg are written before LMTST.
@@ -600,8 +615,8 @@ cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
}
static __rte_always_inline uint16_t
-cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq,
- struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
+cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm,
+ uint64_t *cmd, const uint16_t flags)
{
struct nix_send_hdr_s *send_hdr;
uint64_t prefree = 0, aura;
@@ -634,7 +649,7 @@ cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq,
/* Set invert df if buffer is not to be freed by H/W */
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
aura = send_hdr->w0.aura;
- prefree = (cn9k_nix_prefree_seg(m, txq, send_hdr, &aura) << 55);
+ prefree = (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << 55);
send_hdr->w0.aura = aura;
sg_u |= prefree;
rte_io_wmb();
@@ -664,7 +679,7 @@ cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq,
cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
/* Set invert df if buffer is not to be freed by H/W */
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
- sg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr, NULL) << (i + 55));
+ sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, NULL) << (i + 55));
/* Commit changes to mbuf */
rte_io_wmb();
}
@@ -748,6 +763,7 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
const rte_iova_t io_addr = txq->io_addr;
uint64_t lso_tun_fmt = 0, mark_fmt = 0;
void *lmt_addr = txq->lmt_addr;
+ struct rte_mbuf *extm = NULL;
uint8_t mark_flag = 0;
uint16_t i;
@@ -778,13 +794,16 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
rte_io_wmb();
for (i = 0; i < pkts; i++) {
- cn9k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,
+ cn9k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt,
mark_flag, mark_fmt);
cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,
flags);
cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
}
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+ cn9k_nix_free_extmbuf(extm);
+
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
@@ -799,6 +818,7 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
const rte_iova_t io_addr = txq->io_addr;
uint64_t lso_tun_fmt = 0, mark_fmt = 0;
void *lmt_addr = txq->lmt_addr;
+ struct rte_mbuf *extm = NULL;
uint8_t mark_flag = 0;
uint16_t segdw;
uint64_t i;
@@ -830,14 +850,17 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
rte_io_wmb();
for (i = 0; i < pkts; i++) {
- cn9k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,
+ cn9k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt,
mark_flag, mark_fmt);
- segdw = cn9k_nix_prepare_mseg(txq, tx_pkts[i], cmd, flags);
+ segdw = cn9k_nix_prepare_mseg(txq, tx_pkts[i], &extm, cmd, flags);
cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,
segdw, flags);
cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
}
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+ cn9k_nix_free_extmbuf(extm);
+
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
@@ -885,7 +908,7 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
static __rte_always_inline uint8_t
cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
- struct rte_mbuf *m, uint64_t *cmd,
+ struct rte_mbuf *m, struct rte_mbuf **extm, uint64_t *cmd,
struct nix_send_hdr_s *send_hdr,
union nix_send_sg_s *sg, const uint32_t flags)
{
@@ -910,7 +933,7 @@ cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
aura = send_hdr->w0.aura;
- sg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr, &aura) << 55);
+ sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << 55);
send_hdr->w0.aura = aura;
}
/* Mark mempool object as "put" since it is freed by NIX */
@@ -935,7 +958,7 @@ cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
/* Set invert df if buffer is not to be freed by H/W */
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
- sg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr, &aura) << (i + 55));
+ sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << (i + 55));
/* Mark mempool object as "put" since it is freed by NIX
*/
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -981,9 +1004,8 @@ cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
}
static __rte_always_inline uint8_t
-cn9k_nix_prepare_mseg_vec(struct cn9k_eth_txq *txq,
- struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
- uint64x2_t *cmd1, const uint32_t flags)
+cn9k_nix_prepare_mseg_vec(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm,
+ uint64_t *cmd, uint64x2_t *cmd0, uint64x2_t *cmd1, const uint32_t flags)
{
struct nix_send_hdr_s send_hdr;
struct rte_mbuf *cookie;
@@ -998,7 +1020,7 @@ cn9k_nix_prepare_mseg_vec(struct cn9k_eth_txq *txq,
send_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1);
sg.u = vgetq_lane_u64(cmd1[0], 0);
aura = send_hdr.w0.aura;
- sg.u |= (cn9k_nix_prefree_seg(m, txq, &send_hdr, &aura) << 55);
+ sg.u |= (cn9k_nix_prefree_seg(m, extm, txq, &send_hdr, &aura) << 55);
send_hdr.w0.aura = aura;
cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
cmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0);
@@ -1021,7 +1043,7 @@ cn9k_nix_prepare_mseg_vec(struct cn9k_eth_txq *txq,
send_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1);
sg.u = vgetq_lane_u64(cmd1[0], 0);
- ret = cn9k_nix_prepare_mseg_vec_list(txq, m, cmd, &send_hdr, &sg, flags);
+ ret = cn9k_nix_prepare_mseg_vec_list(txq, m, extm, cmd, &send_hdr, &sg, flags);
cmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0);
cmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1);
@@ -1168,6 +1190,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t *lmt_addr = txq->lmt_addr;
rte_iova_t io_addr = txq->io_addr;
uint64x2_t ltypes01, ltypes23;
+ struct rte_mbuf *extm = NULL;
uint64x2_t xtmp128, ytmp128;
uint64x2_t xmask01, xmask23;
uint64_t lmt_status, i;
@@ -1933,8 +1956,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
!(flags & NIX_TX_MULTI_SEG_F)) {
/* Set don't free bit if reference count > 1 */
- cn9k_nix_prefree_seg_vec(tx_pkts, txq, &senddesc01_w0, &senddesc23_w0,
- &senddesc01_w1, &senddesc23_w1);
+ cn9k_nix_prefree_seg_vec(tx_pkts, &extm, txq, &senddesc01_w0,
+ &senddesc23_w0, &senddesc01_w1, &senddesc23_w1);
/* Ensuring mbuf fields which got updated in
* cnxk_nix_prefree_seg are written before LMTST.
*/
@@ -1995,7 +2018,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
/* Build mseg list for each packet individually. */
for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
segdw[j] = cn9k_nix_prepare_mseg_vec(txq,
- tx_pkts[j],
+ tx_pkts[j], &extm,
seg_list[j], &cmd0[j],
&cmd1[j], flags);
segdw[4] = 8;
@@ -2070,6 +2093,9 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
}
+ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
+ cn9k_nix_free_extmbuf(extm);
+
if (unlikely(pkts_left)) {
if (flags & NIX_TX_MULTI_SEG_F)
pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
--
2.25.1
next prev parent reply other threads:[~2024-03-01 3:17 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-27 14:42 [PATCH] " Rahul Bhansali
2024-02-29 18:27 ` Jerin Jacob
2024-03-01 3:16 ` Rahul Bhansali [this message]
2024-03-03 15:33 ` [PATCH v2] " Jerin Jacob
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240301031645.1656237-1-rbhansali@marvell.com \
--to=rbhansali@marvell.com \
--cc=dev@dpdk.org \
--cc=jerinj@marvell.com \
--cc=kirankumark@marvell.com \
--cc=ndabilpuram@marvell.com \
--cc=pbhagavatula@marvell.com \
--cc=rkudurumalla@marvell.com \
--cc=skori@marvell.com \
--cc=skoteshwar@marvell.com \
--cc=stable@dpdk.org \
--cc=sthotton@marvell.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).