From: Jerin Jacob <jerinjacobk@gmail.com>
To: Pavan Nikhilesh <pbhagavatula@marvell.com>,
Ferruh Yigit <ferruh.yigit@intel.com>
Cc: Jerin Jacob <jerinj@marvell.com>,
Nithin Dabilpuram <ndabilpuram@marvell.com>,
Kiran Kumar K <kirankumark@marvell.com>,
Sunil Kumar Kori <skori@marvell.com>,
Satha Rao <skoteshwar@marvell.com>,
Ankur Dwivedi <adwivedi@marvell.com>,
Anoob Joseph <anoobj@marvell.com>,
Tejasree Kondoj <ktejasree@marvell.com>,
Shijith Thotton <sthotton@marvell.com>, dpdk-dev <dev@dpdk.org>
Subject: Re: [PATCH v4] net/cnxk: avoid command copy from Tx queue
Date: Fri, 11 Feb 2022 15:57:11 +0530 [thread overview]
Message-ID: <CALBAE1OQd2L4WpOZXCCnbPtX_i8apcTwEfLH=BatnYJtGvsNVw@mail.gmail.com> (raw)
In-Reply-To: <20220210131526.1878-1-pbhagavatula@marvell.com>
On Thu, Feb 10, 2022 at 6:46 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Tx command is prepared based on offloads enabled and stored in
> Tx queue structure at tx_queue_setup phase.
> In fastpath the command is copied from Tx queue to LMT line for
> all the packets.
> Since, the command contents are mostly constants we can move the
> command preparation to fastpath and avoid accessing Tx queue
> memory.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
Applied to dpdk-next-net-mrvl/for-next-net. Thanks
> ---
> v4 Changes:
> - Further refactor large functions.
> v3 Changes:
> - Rebase.
> - Split patches.
> - Refactoring large function.
> v2 Changes:
> - Rebase.
> - Fix incorrect use of RoC API
>
> drivers/common/cnxk/roc_io.h | 33 ++++-
> drivers/common/cnxk/roc_io_generic.h | 15 ++
> drivers/crypto/cnxk/cn9k_cryptodev_ops.c | 2 +-
> drivers/crypto/cnxk/cn9k_ipsec.c | 2 +-
> drivers/event/cnxk/cn10k_eventdev.c | 26 +++-
> drivers/event/cnxk/cn10k_worker.h | 89 ++++++------
> drivers/event/cnxk/cn9k_eventdev.c | 33 +++--
> drivers/event/cnxk/cn9k_worker.h | 64 ++++----
> drivers/event/cnxk/cnxk_eventdev.h | 13 +-
> drivers/event/cnxk/cnxk_eventdev_adptr.c | 178 +++++++++++++++++++++--
> drivers/net/cnxk/cn10k_ethdev.c | 24 +--
> drivers/net/cnxk/cn10k_ethdev.h | 3 +-
> drivers/net/cnxk/cn10k_tx.h | 167 ++++++++++-----------
> drivers/net/cnxk/cn9k_ethdev.c | 36 +----
> drivers/net/cnxk/cn9k_ethdev.h | 3 +-
> drivers/net/cnxk/cn9k_tx.h | 135 +++++++++++------
> 16 files changed, 516 insertions(+), 307 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_io.h b/drivers/common/cnxk/roc_io.h
> index 4f15503c29..62e98d9d00 100644
> --- a/drivers/common/cnxk/roc_io.h
> +++ b/drivers/common/cnxk/roc_io.h
> @@ -164,13 +164,36 @@ roc_lmt_mov(void *out, const void *in, const uint32_t lmtext)
> dst128[1] = src128[1];
> /* lmtext receives following value:
> * 1: NIX_SUBDC_EXT needed i.e. tx vlan case
> - * 2: NIX_SUBDC_EXT + NIX_SUBDC_MEM i.e. tstamp case
> */
> - if (lmtext) {
> + if (lmtext)
> + dst128[2] = src128[2];
> +}
> +
> +static __plt_always_inline void
> +roc_lmt_mov64(void *out, const void *in)
> +{
> + volatile const __uint128_t *src128 = (const __uint128_t *)in;
> + volatile __uint128_t *dst128 = (__uint128_t *)out;
> +
> + dst128[0] = src128[0];
> + dst128[1] = src128[1];
> + dst128[2] = src128[2];
> + dst128[3] = src128[3];
> +}
> +
> +static __plt_always_inline void
> +roc_lmt_mov_nv(void *out, const void *in, const uint32_t lmtext)
> +{
> + const __uint128_t *src128 = (const __uint128_t *)in;
> + __uint128_t *dst128 = (__uint128_t *)out;
> +
> + dst128[0] = src128[0];
> + dst128[1] = src128[1];
> + /* lmtext receives following value:
> + * 1: NIX_SUBDC_EXT needed i.e. tx vlan case
> + */
> + if (lmtext)
> dst128[2] = src128[2];
> - if (lmtext > 1)
> - dst128[3] = src128[3];
> - }
> }
>
> static __plt_always_inline void
> diff --git a/drivers/common/cnxk/roc_io_generic.h b/drivers/common/cnxk/roc_io_generic.h
> index 5f90835c09..42764455cc 100644
> --- a/drivers/common/cnxk/roc_io_generic.h
> +++ b/drivers/common/cnxk/roc_io_generic.h
> @@ -106,6 +106,21 @@ roc_lmt_mov(void *out, const void *in, const uint32_t lmtext)
> memset(out, 0, sizeof(__uint128_t) * (lmtext ? lmtext > 1 ? 4 : 3 : 2));
> }
>
> +static __plt_always_inline void
> +roc_lmt_mov64(void *out, const void *in)
> +{
> + PLT_SET_USED(out);
> + PLT_SET_USED(in);
> +}
> +
> +static __plt_always_inline void
> +roc_lmt_mov_nv(void *out, const void *in, const uint32_t lmtext)
> +{
> + PLT_SET_USED(in);
> + PLT_SET_USED(lmtext);
> + memset(out, 0, sizeof(__uint128_t) * (lmtext ? lmtext > 1 ? 4 : 3 : 2));
> +}
> +
> static __plt_always_inline void
> roc_lmt_mov_seg(void *out, const void *in, const uint16_t segdw)
> {
> diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
> index ac1953b66d..ddba9d5dd0 100644
> --- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
> +++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c
> @@ -161,7 +161,7 @@ cn9k_cpt_inst_submit(struct cpt_inst_s *inst, uint64_t lmtline,
>
> do {
> /* Copy CPT command to LMTLINE */
> - roc_lmt_mov((void *)lmtline, inst, 2);
> + roc_lmt_mov64((void *)lmtline, inst);
>
> /*
> * Make sure compiler does not reorder memcpy and ldeor.
> diff --git a/drivers/crypto/cnxk/cn9k_ipsec.c b/drivers/crypto/cnxk/cn9k_ipsec.c
> index 9f876f75f2..672b65a5d2 100644
> --- a/drivers/crypto/cnxk/cn9k_ipsec.c
> +++ b/drivers/crypto/cnxk/cn9k_ipsec.c
> @@ -53,7 +53,7 @@ cn9k_cpt_enq_sa_write(struct cn9k_ipsec_sa *sa, struct cnxk_cpt_qp *qp,
>
> do {
> /* Copy CPT command to LMTLINE */
> - roc_lmt_mov((void *)lmtline, &inst, 2);
> + roc_lmt_mov64((void *)lmtline, &inst);
> lmt_status = roc_lmt_submit_ldeor(io_addr);
> } while (lmt_status == 0);
>
> diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
> index 7b7ce44c74..97a88feb13 100644
> --- a/drivers/event/cnxk/cn10k_eventdev.c
> +++ b/drivers/event/cnxk/cn10k_eventdev.c
> @@ -50,7 +50,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
> /* First cache line is reserved for cookie */
> ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
> ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
> - ws->tx_base = ws->base;
> ws->hws_id = port_id;
> ws->swtag_req = 0;
> ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
> @@ -259,15 +258,13 @@ cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
> ws_cookie,
> sizeof(struct cnxk_sso_hws_cookie) +
> sizeof(struct cn10k_sso_hws) +
> - (sizeof(uint64_t) * (dev->max_port_id + 1) *
> - RTE_MAX_QUEUES_PER_PORT),
> + dev->tx_adptr_data_sz,
> RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
> if (ws_cookie == NULL)
> return -ENOMEM;
> ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
> memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
> - sizeof(uint64_t) * (dev->max_port_id + 1) *
> - RTE_MAX_QUEUES_PER_PORT);
> + dev->tx_adptr_data_sz);
> event_dev->data->ports[i] = ws;
> }
>
> @@ -721,16 +718,35 @@ cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
> const struct rte_eth_dev *eth_dev,
> int32_t tx_queue_id)
> {
> + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
> + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
> + uint64_t tx_offloads;
> int rc;
>
> RTE_SET_USED(id);
> rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
> if (rc < 0)
> return rc;
> +
> + /* Can't enable tstamp if all the ports don't have it enabled. */
> + tx_offloads = cnxk_eth_dev->tx_offload_flags;
> + if (dev->tx_adptr_configured) {
> + uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
> + uint8_t tstmp_ena =
> + !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
> +
> + if (tstmp_ena && !tstmp_req)
> + dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
> + else if (!tstmp_ena && tstmp_req)
> + tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
> + }
> +
> + dev->tx_offloads |= tx_offloads;
> rc = cn10k_sso_updt_tx_adptr_data(event_dev);
> if (rc < 0)
> return rc;
> cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
> + dev->tx_adptr_configured = 1;
>
> return 0;
> }
> diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
> index 4019c13bd2..ff08b2d974 100644
> --- a/drivers/event/cnxk/cn10k_worker.h
> +++ b/drivers/event/cnxk/cn10k_worker.h
> @@ -455,18 +455,18 @@ NIX_RX_FASTPATH_MODES
> }
>
> static __rte_always_inline struct cn10k_eth_txq *
> -cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
> - const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
> +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
> {
> - return (struct cn10k_eth_txq *)
> - txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
> + return (struct cn10k_eth_txq
> + *)(txq_data[(txq_data[m->port] >> 48) +
> + rte_event_eth_tx_adapter_txq_get(m)] &
> + (BIT_ULL(48) - 1));
> }
>
> static __rte_always_inline void
> -cn10k_sso_tx_one(struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
> - uintptr_t lmt_addr, uint8_t sched_type, uintptr_t base,
> - const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> - const uint32_t flags)
> +cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
> + uint16_t lmt_id, uintptr_t lmt_addr, uint8_t sched_type,
> + const uint64_t *txq_data, const uint32_t flags)
> {
> uint8_t lnum = 0, loff = 0, shft = 0;
> struct cn10k_eth_txq *txq;
> @@ -476,7 +476,7 @@ cn10k_sso_tx_one(struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
> bool sec;
>
> txq = cn10k_sso_hws_xtract_meta(m, txq_data);
> - cn10k_nix_tx_skeleton(txq, cmd, flags);
> + cn10k_nix_tx_skeleton(txq, cmd, flags, 0);
> /* Perform header writes before barrier
> * for TSO
> */
> @@ -501,23 +501,23 @@ cn10k_sso_tx_one(struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
> else
> segdw = cn10k_nix_tx_ext_subs(flags) + 2;
>
> + cn10k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
> if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
> pa = txq->cpt_io_addr | 3 << 4;
> else
> pa = txq->io_addr | ((segdw - 1) << 4);
>
> if (!sched_type)
> - roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG);
>
> roc_lmt_submit_steorl(lmt_id, pa);
> }
>
> static __rte_always_inline void
> -cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
> - uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
> - uint8_t sched_type, uintptr_t base,
> - const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> - const uint32_t flags)
> +cn10k_sso_vwqe_split_tx(struct cn10k_sso_hws *ws, struct rte_mbuf **mbufs,
> + uint16_t nb_mbufs, uint64_t *cmd, uint16_t lmt_id,
> + uintptr_t lmt_addr, uint8_t sched_type,
> + const uint64_t *txq_data, const uint32_t flags)
> {
> uint16_t port[4], queue[4];
> uint16_t i, j, pkts, scalar;
> @@ -540,14 +540,16 @@ cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
> if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
> ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
> for (j = 0; j < 4; j++)
> - cn10k_sso_tx_one(mbufs[i + j], cmd, lmt_id,
> - lmt_addr, sched_type, base,
> - txq_data, flags);
> + cn10k_sso_tx_one(ws, mbufs[i + j], cmd, lmt_id,
> + lmt_addr, sched_type, txq_data,
> + flags);
> } else {
> - txq = (struct cn10k_eth_txq *)
> - txq_data[port[0]][queue[0]];
> - cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd,
> - base + SSOW_LF_GWS_TAG,
> + txq = (struct cn10k_eth_txq
> + *)(txq_data[(txq_data[port[0]] >> 48) +
> + queue[0]] &
> + (BIT_ULL(48) - 1));
> + cn10k_nix_xmit_pkts_vector(txq, (uint64_t *)ws,
> + &mbufs[i], 4, cmd,
> flags | NIX_TX_VWQE_F);
> }
> }
> @@ -555,15 +557,14 @@ cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
> mbufs += i;
>
> for (i = 0; i < scalar; i++) {
> - cn10k_sso_tx_one(mbufs[i], cmd, lmt_id, lmt_addr, sched_type,
> - base, txq_data, flags);
> + cn10k_sso_tx_one(ws, mbufs[i], cmd, lmt_id, lmt_addr,
> + sched_type, txq_data, flags);
> }
> }
>
> static __rte_always_inline uint16_t
> cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
> - uint64_t *cmd,
> - const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> + uint64_t *cmd, const uint64_t *txq_data,
> const uint32_t flags)
> {
> struct cn10k_eth_txq *txq;
> @@ -580,17 +581,19 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
> uint64_t meta = *(uint64_t *)ev->vec;
>
> if (meta & BIT(31)) {
> - txq = (struct cn10k_eth_txq *)
> - txq_data[meta >> 32][meta >> 48];
> -
> - cn10k_nix_xmit_pkts_vector(
> - txq, mbufs, meta & 0xFFFF, cmd,
> - ws->tx_base + SSOW_LF_GWS_TAG,
> - flags | NIX_TX_VWQE_F);
> + txq = (struct cn10k_eth_txq
> + *)(txq_data[(txq_data[meta >> 32] >>
> + 48) +
> + (meta >> 48)] &
> + (BIT_ULL(48) - 1));
> +
> + cn10k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs,
> + meta & 0xFFFF, cmd,
> + flags | NIX_TX_VWQE_F);
> } else {
> cn10k_sso_vwqe_split_tx(
> - mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
> - ev->sched_type, ws->tx_base, txq_data, flags);
> + ws, mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
> + ev->sched_type, txq_data, flags);
> }
> rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
> return (meta & 0xFFFF);
> @@ -598,16 +601,16 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
>
> m = ev->mbuf;
> ref_cnt = m->refcnt;
> - cn10k_sso_tx_one(m, cmd, lmt_id, lmt_addr, ev->sched_type, ws->tx_base,
> - txq_data, flags);
> + cn10k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data,
> + flags);
>
> if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> if (ref_cnt > 1)
> return 1;
> }
>
> - cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
> - ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
> + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_TAG,
> + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
> return 1;
> }
>
> @@ -628,9 +631,7 @@ NIX_TX_FASTPATH_MODES
> uint64_t cmd[sz]; \
> RTE_SET_USED(nb_events); \
> return cn10k_sso_hws_event_tx( \
> - ws, &ev[0], cmd, \
> - (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
> - ws->tx_adptr_data, \
> + ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, \
> flags); \
> }
>
> @@ -642,9 +643,7 @@ NIX_TX_FASTPATH_MODES
> struct cn10k_sso_hws *ws = port; \
> RTE_SET_USED(nb_events); \
> return cn10k_sso_hws_event_tx( \
> - ws, &ev[0], cmd, \
> - (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
> - ws->tx_adptr_data, \
> + ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data, \
> (flags) | NIX_TX_MULTI_SEG_F); \
> }
>
> diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
> index 4611936b7f..f8652d4fbc 100644
> --- a/drivers/event/cnxk/cn9k_eventdev.c
> +++ b/drivers/event/cnxk/cn9k_eventdev.c
> @@ -259,17 +259,14 @@ cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
> ws_cookie,
> sizeof(struct cnxk_sso_hws_cookie) +
> sizeof(struct cn9k_sso_hws_dual) +
> - (sizeof(uint64_t) *
> - (dev->max_port_id + 1) *
> - RTE_MAX_QUEUES_PER_PORT),
> + dev->tx_adptr_data_sz,
> RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
> if (ws_cookie == NULL)
> return -ENOMEM;
> dws = RTE_PTR_ADD(ws_cookie,
> sizeof(struct cnxk_sso_hws_cookie));
> memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
> - sizeof(uint64_t) * (dev->max_port_id + 1) *
> - RTE_MAX_QUEUES_PER_PORT);
> + dev->tx_adptr_data_sz);
> event_dev->data->ports[i] = dws;
> } else {
> struct cn9k_sso_hws *ws = event_dev->data->ports[i];
> @@ -280,17 +277,14 @@ cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
> ws_cookie,
> sizeof(struct cnxk_sso_hws_cookie) +
> sizeof(struct cn9k_sso_hws_dual) +
> - (sizeof(uint64_t) *
> - (dev->max_port_id + 1) *
> - RTE_MAX_QUEUES_PER_PORT),
> + dev->tx_adptr_data_sz,
> RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
> if (ws_cookie == NULL)
> return -ENOMEM;
> ws = RTE_PTR_ADD(ws_cookie,
> sizeof(struct cnxk_sso_hws_cookie));
> memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
> - sizeof(uint64_t) * (dev->max_port_id + 1) *
> - RTE_MAX_QUEUES_PER_PORT);
> + dev->tx_adptr_data_sz);
> event_dev->data->ports[i] = ws;
> }
> }
> @@ -987,17 +981,36 @@ cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
> const struct rte_eth_dev *eth_dev,
> int32_t tx_queue_id)
> {
> + struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
> + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
> + uint64_t tx_offloads;
> int rc;
>
> RTE_SET_USED(id);
> rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
> if (rc < 0)
> return rc;
> +
> + /* Can't enable tstamp if all the ports don't have it enabled. */
> + tx_offloads = cnxk_eth_dev->tx_offload_flags;
> + if (dev->tx_adptr_configured) {
> + uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
> + uint8_t tstmp_ena =
> + !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
> +
> + if (tstmp_ena && !tstmp_req)
> + dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
> + else if (!tstmp_ena && tstmp_req)
> + tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
> + }
> +
> + dev->tx_offloads |= tx_offloads;
> cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true);
> rc = cn9k_sso_updt_tx_adptr_data(event_dev);
> if (rc < 0)
> return rc;
> cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
> + dev->tx_adptr_configured = 1;
>
> return 0;
> }
> diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
> index c99e459c1b..303b04c215 100644
> --- a/drivers/event/cnxk/cn9k_worker.h
> +++ b/drivers/event/cnxk/cn9k_worker.h
> @@ -599,20 +599,13 @@ cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
> ;
> }
>
> -static __rte_always_inline const struct cn9k_eth_txq *
> -cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
> - const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
> +static __rte_always_inline struct cn9k_eth_txq *
> +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, uint64_t *txq_data)
> {
> - return (const struct cn9k_eth_txq *)
> - txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
> -}
> -
> -static __rte_always_inline void
> -cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
> - uint64_t *cmd, const uint32_t flags)
> -{
> - roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
> - cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
> + return (struct cn9k_eth_txq
> + *)(txq_data[(txq_data[m->port] >> 48) +
> + rte_event_eth_tx_adapter_txq_get(m)] &
> + (BIT_ULL(48) - 1));
> }
>
> #if defined(RTE_ARCH_ARM64)
> @@ -669,7 +662,7 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, uint64_t base,
> nixtx += BIT_ULL(7);
> nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
>
> - roc_lmt_mov((void *)(nixtx + 16), cmd, cn9k_nix_tx_ext_subs(flags));
> + roc_lmt_mov_nv((void *)(nixtx + 16), cmd, cn9k_nix_tx_ext_subs(flags));
>
> /* Load opcode and cptr already prepared at pkt metadata set */
> pkt_len -= l2_len;
> @@ -756,12 +749,11 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, uint64_t base,
>
> static __rte_always_inline uint16_t
> cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> - const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> - const uint32_t flags)
> + uint64_t *txq_data, const uint32_t flags)
> {
> struct rte_mbuf *m = ev->mbuf;
> - const struct cn9k_eth_txq *txq;
> uint16_t ref_cnt = m->refcnt;
> + struct cn9k_eth_txq *txq;
>
> /* Perform header writes before barrier for TSO */
> cn9k_nix_xmit_prepare_tso(m, flags);
> @@ -774,7 +766,8 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> !(flags & NIX_TX_OFFLOAD_SECURITY_F))
> rte_io_wmb();
> txq = cn9k_sso_hws_xtract_meta(m, txq_data);
> - cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
> + cn9k_nix_tx_skeleton(txq, cmd, flags, 0);
> + cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
>
> if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
> uint64_t ol_flags = m->ol_flags;
> @@ -796,6 +789,8 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
>
> if (flags & NIX_TX_MULTI_SEG_F) {
> const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
> + cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, segdw,
> + flags);
> if (!CNXK_TT_FROM_EVENT(ev->event)) {
> cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
> roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> @@ -808,6 +803,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> segdw);
> }
> } else {
> + cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, 4, flags);
> if (!CNXK_TT_FROM_EVENT(ev->event)) {
> cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
> roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> @@ -853,11 +849,9 @@ NIX_TX_FASTPATH_MODES
> struct cn9k_sso_hws *ws = port; \
> uint64_t cmd[sz]; \
> RTE_SET_USED(nb_events); \
> - return cn9k_sso_hws_event_tx( \
> - ws->base, &ev[0], cmd, \
> - (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
> - ws->tx_adptr_data, \
> - flags); \
> + return cn9k_sso_hws_event_tx(ws->base, &ev[0], cmd, \
> + (uint64_t *)ws->tx_adptr_data, \
> + flags); \
> }
>
> #define SSO_TX_SEG(fn, sz, flags) \
> @@ -867,11 +861,9 @@ NIX_TX_FASTPATH_MODES
> uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
> struct cn9k_sso_hws *ws = port; \
> RTE_SET_USED(nb_events); \
> - return cn9k_sso_hws_event_tx( \
> - ws->base, &ev[0], cmd, \
> - (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
> - ws->tx_adptr_data, \
> - (flags) | NIX_TX_MULTI_SEG_F); \
> + return cn9k_sso_hws_event_tx(ws->base, &ev[0], cmd, \
> + (uint64_t *)ws->tx_adptr_data, \
> + (flags) | NIX_TX_MULTI_SEG_F); \
> }
>
> #define SSO_DUAL_TX(fn, sz, flags) \
> @@ -881,11 +873,9 @@ NIX_TX_FASTPATH_MODES
> struct cn9k_sso_hws_dual *ws = port; \
> uint64_t cmd[sz]; \
> RTE_SET_USED(nb_events); \
> - return cn9k_sso_hws_event_tx( \
> - ws->base[!ws->vws], &ev[0], cmd, \
> - (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
> - ws->tx_adptr_data, \
> - flags); \
> + return cn9k_sso_hws_event_tx(ws->base[!ws->vws], &ev[0], cmd, \
> + (uint64_t *)ws->tx_adptr_data, \
> + flags); \
> }
>
> #define SSO_DUAL_TX_SEG(fn, sz, flags) \
> @@ -895,11 +885,9 @@ NIX_TX_FASTPATH_MODES
> uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
> struct cn9k_sso_hws_dual *ws = port; \
> RTE_SET_USED(nb_events); \
> - return cn9k_sso_hws_event_tx( \
> - ws->base[!ws->vws], &ev[0], cmd, \
> - (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \
> - ws->tx_adptr_data, \
> - (flags) | NIX_TX_MULTI_SEG_F); \
> + return cn9k_sso_hws_event_tx(ws->base[!ws->vws], &ev[0], cmd, \
> + (uint64_t *)ws->tx_adptr_data, \
> + (flags) | NIX_TX_MULTI_SEG_F); \
> }
>
> #endif
> diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
> index 4652b58a84..b26df58588 100644
> --- a/drivers/event/cnxk/cnxk_eventdev.h
> +++ b/drivers/event/cnxk/cnxk_eventdev.h
> @@ -99,7 +99,10 @@ struct cnxk_sso_evdev {
> uint16_t rx_adptr_pool_cnt;
> uint64_t *rx_adptr_pools;
> uint64_t *tx_adptr_data;
> + size_t tx_adptr_data_sz;
> uint16_t max_port_id;
> + uint16_t max_queue_id[RTE_MAX_ETHPORTS];
> + uint8_t tx_adptr_configured;
> uint16_t tim_adptr_ring_cnt;
> uint16_t *timer_adptr_rings;
> uint64_t *timer_adptr_sz;
> @@ -131,8 +134,8 @@ struct cn10k_sso_hws {
> uint64_t *fc_mem;
> uintptr_t grp_base;
> /* Tx Fastpath data */
> - uint64_t tx_base __rte_cache_aligned;
> - uintptr_t lmt_base;
> + uintptr_t lmt_base __rte_cache_aligned;
> + uint64_t lso_tun_fmt;
> uint8_t tx_adptr_data[];
> } __rte_cache_aligned;
>
> @@ -149,7 +152,8 @@ struct cn9k_sso_hws {
> uint64_t *fc_mem;
> uintptr_t grp_base;
> /* Tx Fastpath data */
> - uint8_t tx_adptr_data[] __rte_cache_aligned;
> + uint64_t lso_tun_fmt __rte_cache_aligned;
> + uint8_t tx_adptr_data[];
> } __rte_cache_aligned;
>
> struct cn9k_sso_hws_dual {
> @@ -165,7 +169,8 @@ struct cn9k_sso_hws_dual {
> uint64_t *fc_mem;
> uintptr_t grp_base;
> /* Tx Fastpath data */
> - uint8_t tx_adptr_data[] __rte_cache_aligned;
> + uint64_t lso_tun_fmt __rte_cache_aligned;
> + uint8_t tx_adptr_data[];
> } __rte_cache_aligned;
>
> struct cnxk_sso_hws_cookie {
> diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
> index fdcd68ca63..5ebd3340e7 100644
> --- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
> +++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
> @@ -339,30 +339,179 @@ cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
> sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs));
> }
>
> +static void
> +cnxk_sso_tx_queue_data_init(struct cnxk_sso_evdev *dev, uint64_t *txq_data,
> + uint16_t eth_port_id, uint16_t tx_queue_id)
> +{
> + uint64_t offset = 0;
> + int i;
> +
> + dev->max_queue_id[0] = RTE_MAX(dev->max_queue_id[0], eth_port_id);
> + for (i = 1; i < eth_port_id; i++) {
> + offset += (dev->max_queue_id[i - 1] + 1);
> + txq_data[i] |= offset << 48;
> + }
> + dev->max_port_id = RTE_MAX(dev->max_port_id, eth_port_id);
> + dev->max_queue_id[eth_port_id] =
> + RTE_MAX(dev->max_queue_id[eth_port_id], tx_queue_id);
> +}
> +
> +static void
> +cnxk_sso_tx_queue_data_cpy(struct cnxk_sso_evdev *dev, uint64_t *txq_data,
> + uint64_t *otxq_data, uint16_t eth_port_id)
> +{
> + uint64_t offset = 0;
> + int i, j;
> +
> + for (i = 1; i < eth_port_id; i++) {
> + offset += (dev->max_queue_id[i - 1] + 1);
> + txq_data[i] |= offset << 48;
> + for (j = 0;
> + (i < dev->max_port_id) && (j < dev->max_queue_id[i] + 1);
> + j++)
> + txq_data[offset + j] =
> + otxq_data[(otxq_data[i] >> 48) + j];
> + }
> +}
> +
> +static void
> +cnxk_sso_tx_queue_data_cpy_max(struct cnxk_sso_evdev *dev, uint64_t *txq_data,
> + uint64_t *otxq_data, uint16_t eth_port_id,
> + uint16_t max_port_id, uint16_t max_queue_id)
> +{
> + uint64_t offset = 0;
> + int i, j;
> +
> + for (i = 1; i < max_port_id + 1; i++) {
> + offset += (dev->max_queue_id[i - 1] + 1);
> + txq_data[i] |= offset << 48;
> + for (j = 0; j < dev->max_queue_id[i] + 1; j++) {
> + if (i == eth_port_id && j > max_queue_id)
> + continue;
> + txq_data[offset + j] =
> + otxq_data[(otxq_data[i] >> 48) + j];
> + }
> + }
> +}
> +
> +static void
> +cnxk_sso_tx_queue_data_rewrite(struct cnxk_sso_evdev *dev, uint64_t *txq_data,
> + uint16_t eth_port_id, uint16_t tx_queue_id,
> + uint64_t *otxq_data, uint16_t max_port_id,
> + uint16_t max_queue_id)
> +{
> + int i;
> +
> + for (i = 0; i < dev->max_queue_id[0] + 1; i++)
> + txq_data[i] |= (otxq_data[i] & ~((BIT_ULL(16) - 1) << 48));
> +
> + if (eth_port_id > max_port_id) {
> + dev->max_queue_id[0] =
> + RTE_MAX(dev->max_queue_id[0], eth_port_id);
> + dev->max_port_id = RTE_MAX(dev->max_port_id, eth_port_id);
> +
> + cnxk_sso_tx_queue_data_cpy(dev, txq_data, otxq_data,
> + eth_port_id);
> + dev->max_queue_id[eth_port_id] =
> + RTE_MAX(dev->max_queue_id[eth_port_id], tx_queue_id);
> + } else if (tx_queue_id > max_queue_id) {
> + dev->max_queue_id[eth_port_id] =
> + RTE_MAX(dev->max_queue_id[eth_port_id], tx_queue_id);
> + dev->max_port_id = RTE_MAX(max_port_id, eth_port_id);
> + cnxk_sso_tx_queue_data_cpy_max(dev, txq_data, otxq_data,
> + eth_port_id, max_port_id,
> + max_queue_id);
> + }
> +}
> +
> +static void
> +cnxk_sso_tx_queue_data_sz(struct cnxk_sso_evdev *dev, uint16_t eth_port_id,
> + uint16_t tx_queue_id, uint16_t max_port_id,
> + uint16_t max_queue_id, uint64_t *r, size_t *sz)
> +{
> + uint64_t row = 0;
> + size_t size = 0;
> + int i;
> +
> + if (dev->tx_adptr_data == NULL) {
> + size = (eth_port_id + 1);
> + size += (eth_port_id + tx_queue_id);
> + row = 2 * eth_port_id;
> + *r = row;
> + *sz = size;
> + return;
> + }
> +
> + if (eth_port_id > max_port_id) {
> + size = (RTE_MAX(eth_port_id, dev->max_queue_id[0]) + 1);
> + for (i = 1; i < eth_port_id; i++)
> + size += (dev->max_queue_id[i] + 1);
> + row = size;
> + size += (tx_queue_id + 1);
> + } else if (tx_queue_id > max_queue_id) {
> + size = !eth_port_id ?
> + tx_queue_id + 1 :
> + RTE_MAX(max_port_id, dev->max_queue_id[0]) + 1;
> + for (i = 1; i < max_port_id + 1; i++) {
> + if (i == eth_port_id) {
> + row = size;
> + size += tx_queue_id + 1;
> + } else {
> + size += dev->max_queue_id[i] + 1;
> + }
> + }
> + }
> + *r = row;
> + *sz = size;
> +}
> +
> static int
> cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
> uint16_t eth_port_id, uint16_t tx_queue_id,
> void *txq)
> {
> struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
> + uint16_t max_queue_id = dev->max_queue_id[eth_port_id];
> uint16_t max_port_id = dev->max_port_id;
> - uint64_t *txq_data = dev->tx_adptr_data;
> -
> - if (txq_data == NULL || eth_port_id > max_port_id) {
> - max_port_id = RTE_MAX(max_port_id, eth_port_id);
> - txq_data = rte_realloc_socket(
> - txq_data,
> - (sizeof(uint64_t) * (max_port_id + 1) *
> - RTE_MAX_QUEUES_PER_PORT),
> - RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
> + uint64_t *txq_data = NULL;
> + uint64_t row = 0;
> + size_t size = 0;
> +
> + if (((uint64_t)txq) & 0xFFFF000000000000)
> + return -EINVAL;
> +
> + cnxk_sso_tx_queue_data_sz(dev, eth_port_id, tx_queue_id, max_port_id,
> + max_queue_id, &row, &size);
> +
> + size *= sizeof(uint64_t);
> +
> + if (size) {
> + uint64_t *otxq_data = dev->tx_adptr_data;
> +
> + txq_data = malloc(size);
> if (txq_data == NULL)
> return -ENOMEM;
> + memset(txq_data, 0, size);
> + txq_data[eth_port_id] = ((uint64_t)row) << 48;
> + txq_data[row + tx_queue_id] = (uint64_t)txq;
> +
> + if (otxq_data != NULL)
> + cnxk_sso_tx_queue_data_rewrite(
> + dev, txq_data, eth_port_id, tx_queue_id,
> + otxq_data, max_port_id, max_queue_id);
> + else
> + cnxk_sso_tx_queue_data_init(dev, txq_data, eth_port_id,
> + tx_queue_id);
> + dev->tx_adptr_data_sz = size;
> + free(otxq_data);
> + dev->tx_adptr_data = txq_data;
> + } else {
> + txq_data = dev->tx_adptr_data;
> + row = txq_data[eth_port_id] >> 48;
> + txq_data[row + tx_queue_id] &= ~(BIT_ULL(48) - 1);
> + txq_data[row + tx_queue_id] |= (uint64_t)txq;
> }
>
> - ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
> - txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
> - dev->max_port_id = max_port_id;
> - dev->tx_adptr_data = txq_data;
> return 0;
> }
>
> @@ -372,7 +521,6 @@ cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
> int32_t tx_queue_id)
> {
> struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
> - struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
> struct roc_nix_sq *sq;
> int i, ret;
> void *txq;
> @@ -388,8 +536,6 @@ cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
> event_dev, eth_dev->data->port_id, tx_queue_id, txq);
> if (ret < 0)
> return ret;
> -
> - dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
> }
>
> return 0;
> diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
> index 8378cbffc2..9bb08e1824 100644
> --- a/drivers/net/cnxk/cn10k_ethdev.c
> +++ b/drivers/net/cnxk/cn10k_ethdev.c
> @@ -131,53 +131,31 @@ static void
> nix_form_default_desc(struct cnxk_eth_dev *dev, struct cn10k_eth_txq *txq,
> uint16_t qid)
> {
> - struct nix_send_ext_s *send_hdr_ext;
> union nix_send_hdr_w0_u send_hdr_w0;
> - struct nix_send_mem_s *send_mem;
> - union nix_send_sg_s sg_w0;
> -
> - RTE_SET_USED(dev);
>
> /* Initialize the fields based on basic single segment packet */
> - memset(&txq->cmd, 0, sizeof(txq->cmd));
> send_hdr_w0.u = 0;
> - sg_w0.u = 0;
> -
> if (dev->tx_offload_flags & NIX_TX_NEED_EXT_HDR) {
> /* 2(HDR) + 2(EXT_HDR) + 1(SG) + 1(IOVA) = 6/2 - 1 = 2 */
> send_hdr_w0.sizem1 = 2;
> -
> - send_hdr_ext = (struct nix_send_ext_s *)&txq->cmd[0];
> - send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
> if (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> /* Default: one seg packet would have:
> * 2(HDR) + 2(EXT) + 1(SG) + 1(IOVA) + 2(MEM)
> * => 8/2 - 1 = 3
> */
> send_hdr_w0.sizem1 = 3;
> - send_hdr_ext->w0.tstmp = 1;
>
> /* To calculate the offset for send_mem,
> * send_hdr->w0.sizem1 * 2
> */
> - send_mem = (struct nix_send_mem_s *)(txq->cmd + 2);
> - send_mem->w0.subdc = NIX_SUBDC_MEM;
> - send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP;
> - send_mem->addr = dev->tstamp.tx_tstamp_iova;
> + txq->ts_mem = dev->tstamp.tx_tstamp_iova;
> }
> } else {
> /* 2(HDR) + 1(SG) + 1(IOVA) = 4/2 - 1 = 1 */
> send_hdr_w0.sizem1 = 1;
> }
> -
> send_hdr_w0.sq = qid;
> - sg_w0.subdc = NIX_SUBDC_SG;
> - sg_w0.segs = 1;
> - sg_w0.ld_type = NIX_SENDLDTYPE_LDD;
> -
> txq->send_hdr_w0 = send_hdr_w0.u;
> - txq->sg_w0 = sg_w0.u;
> -
> rte_wmb();
> }
>
> diff --git a/drivers/net/cnxk/cn10k_ethdev.h b/drivers/net/cnxk/cn10k_ethdev.h
> index 0982158c62..ec40e53152 100644
> --- a/drivers/net/cnxk/cn10k_ethdev.h
> +++ b/drivers/net/cnxk/cn10k_ethdev.h
> @@ -9,7 +9,6 @@
>
> struct cn10k_eth_txq {
> uint64_t send_hdr_w0;
> - uint64_t sg_w0;
> int64_t fc_cache_pkts;
> uint64_t *fc_mem;
> uintptr_t lmt_base;
> @@ -20,8 +19,8 @@ struct cn10k_eth_txq {
> uint64_t sa_base;
> uint64_t *cpt_fc;
> uint16_t cpt_desc;
> - uint64_t cmd[4];
> uint64_t lso_tun_fmt;
> + uint64_t ts_mem;
> } __plt_cache_aligned;
>
> struct cn10k_eth_rxq {
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index fc1f6ceb8c..4ae6bbf517 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -186,23 +186,26 @@ cn10k_cpt_tx_steor_data(void)
> }
>
> static __rte_always_inline void
> -cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
> - const uint16_t flags)
> +cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
> + const uint16_t flags, const uint16_t static_sz)
> {
> - /* Send hdr */
> - cmd[0] = txq->send_hdr_w0;
> + if (static_sz)
> + cmd[0] = txq->send_hdr_w0;
> + else
> + cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
> + ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
> cmd[1] = 0;
> - cmd += 2;
>
> - /* Send ext if present */
> if (flags & NIX_TX_NEED_EXT_HDR) {
> - *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
> - cmd += 2;
> + if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
> + cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
> + else
> + cmd[2] = NIX_SUBDC_EXT << 60;
> + cmd[3] = 0;
> + cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
> + } else {
> + cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
> }
> -
> - /* Send sg */
> - cmd[0] = txq->sg_w0;
> - cmd[1] = 0;
> }
>
> static __rte_always_inline void
> @@ -718,41 +721,29 @@ cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
> }
>
> static __rte_always_inline void
> -cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
> +cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
> const uint64_t ol_flags, const uint16_t no_segdw,
> const uint16_t flags)
> {
> if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> - const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
> - struct nix_send_ext_s *send_hdr_ext =
> - (struct nix_send_ext_s *)lmt_addr + 16;
> + const uint8_t is_ol_tstamp =
> + !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
> uint64_t *lmt = (uint64_t *)lmt_addr;
> uint16_t off = (no_segdw - 1) << 1;
> struct nix_send_mem_s *send_mem;
>
> send_mem = (struct nix_send_mem_s *)(lmt + off);
> - send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
> - send_hdr_ext->w0.tstmp = 1;
> - if (flags & NIX_TX_MULTI_SEG_F) {
> - /* Retrieving the default desc values */
> - lmt[off] = cmd[2];
> -
> - /* Using compiler barrier to avoid violation of C
> - * aliasing rules.
> - */
> - rte_compiler_barrier();
> - }
> -
> - /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
> + /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
> * should not be recorded, hence changing the alg type to
> - * NIX_SENDMEMALG_SET and also changing send mem addr field to
> + * NIX_SENDMEMALG_SUB and also changing send mem addr field to
> * next 8 bytes as it corrupts the actual Tx tstamp registered
> * address.
> */
> send_mem->w0.subdc = NIX_SUBDC_MEM;
> - send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
> + send_mem->w0.alg =
> + NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
> send_mem->addr =
> - (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
> + (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
> }
> }
>
> @@ -841,8 +832,8 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
> }
>
> static __rte_always_inline uint16_t
> -cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> - uint64_t *cmd, uintptr_t base, const uint16_t flags)
> +cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
> + uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> {
> struct cn10k_eth_txq *txq = tx_queue;
> const rte_iova_t io_addr = txq->io_addr;
> @@ -863,9 +854,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> /* Reduce the cached count */
> txq->fc_cache_pkts -= pkts;
> }
> -
> /* Get cmd skeleton */
> - cn10k_nix_tx_skeleton(txq, cmd, flags);
> + cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
>
> if (flags & NIX_TX_OFFLOAD_TSO_F)
> lso_tun_fmt = txq->lso_tun_fmt;
> @@ -909,14 +899,14 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
>
> /* Move NIX desc to LMT/NIXTX area */
> cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
> - cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
> - tx_pkts[i]->ol_flags, 4, flags);
> + cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
> + 4, flags);
> if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
> lnum++;
> }
>
> if (flags & NIX_TX_VWQE_F)
> - roc_sso_hws_head_wait(base);
> + roc_sso_hws_head_wait(ws[0]);
>
> left -= burst;
> tx_pkts += burst;
> @@ -967,9 +957,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
> }
>
> static __rte_always_inline uint16_t
> -cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, uintptr_t base,
> - const uint16_t flags)
> +cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
> + struct rte_mbuf **tx_pkts, uint16_t pkts,
> + uint64_t *cmd, const uint16_t flags)
> {
> struct cn10k_eth_txq *txq = tx_queue;
> uintptr_t pa0, pa1, lbase = txq->lmt_base;
> @@ -987,12 +977,13 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> uintptr_t laddr;
> bool sec;
>
> - NIX_XMIT_FC_OR_RETURN(txq, pkts);
> -
> - cn10k_nix_tx_skeleton(txq, cmd, flags);
> -
> - /* Reduce the cached count */
> - txq->fc_cache_pkts -= pkts;
> + if (!(flags & NIX_TX_VWQE_F)) {
> + NIX_XMIT_FC_OR_RETURN(txq, pkts);
> + /* Reduce the cached count */
> + txq->fc_cache_pkts -= pkts;
> + }
> + /* Get cmd skeleton */
> + cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
>
> if (flags & NIX_TX_OFFLOAD_TSO_F)
> lso_tun_fmt = txq->lso_tun_fmt;
> @@ -1038,13 +1029,11 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
>
> /* Move NIX desc to LMT/NIXTX area */
> cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
> -
> /* Store sg list directly on lmt line */
> segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
> flags);
> - cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
> - tx_pkts[i]->ol_flags, segdw,
> - flags);
> + cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
> + segdw, flags);
> if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
> lnum++;
> data128 |= (((__uint128_t)(segdw - 1)) << shft);
> @@ -1053,7 +1042,7 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> }
>
> if (flags & NIX_TX_VWQE_F)
> - roc_sso_hws_head_wait(base);
> + roc_sso_hws_head_wait(ws[0]);
>
> left -= burst;
> tx_pkts += burst;
> @@ -1474,9 +1463,9 @@ cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
> }
>
> static __rte_always_inline uint16_t
> -cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, uintptr_t base,
> - const uint16_t flags)
> +cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
> + struct rte_mbuf **tx_pkts, uint16_t pkts,
> + uint64_t *cmd, const uint16_t flags)
> {
> uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
> uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
> @@ -1526,25 +1515,42 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
> }
>
> - senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
> + if (!(flags & NIX_TX_VWQE_F)) {
> + senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
> + } else {
> + uint64_t w0 =
> + (txq->send_hdr_w0 & 0xFFFFF00000000000) |
> + ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
> +
> + senddesc01_w0 = vdupq_n_u64(w0);
> + }
> senddesc23_w0 = senddesc01_w0;
> +
> senddesc01_w1 = vdupq_n_u64(0);
> senddesc23_w1 = senddesc01_w1;
> - sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
> + sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
> sgdesc23_w0 = sgdesc01_w0;
>
> - /* Load command defaults into vector variables. */
> if (flags & NIX_TX_NEED_EXT_HDR) {
> - sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
> - sendext23_w0 = sendext01_w0;
> - sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
> - sendext23_w1 = sendext01_w1;
> if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> - sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
> + sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
> + BIT_ULL(15));
> + sendmem01_w0 =
> + vdupq_n_u64((NIX_SUBDC_MEM << 60) |
> + (NIX_SENDMEMALG_SETTSTMP << 56));
> sendmem23_w0 = sendmem01_w0;
> - sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
> + sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
> sendmem23_w1 = sendmem01_w1;
> + } else {
> + sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
> }
> + sendext23_w0 = sendext01_w0;
> +
> + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
> + sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
> + else
> + sendext01_w1 = vdupq_n_u64(0);
> + sendext23_w1 = sendext01_w1;
> }
>
> /* Get LMT base address and LMT ID as lcore id */
> @@ -2577,7 +2583,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> wd.data[0] >>= 16;
>
> if (flags & NIX_TX_VWQE_F)
> - roc_sso_hws_head_wait(base);
> + roc_sso_hws_head_wait(ws[0]);
>
> left -= burst;
>
> @@ -2640,12 +2646,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>
> if (unlikely(scalar)) {
> if (flags & NIX_TX_MULTI_SEG_F)
> - pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
> - scalar, cmd, base,
> - flags);
> + pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
> + scalar, cmd, flags);
> else
> - pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
> - cmd, base, flags);
> + pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
> + scalar, cmd, flags);
> }
>
> return pkts;
> @@ -2653,16 +2658,16 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>
> #else
> static __rte_always_inline uint16_t
> -cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, uint64_t *cmd, uintptr_t base,
> - const uint16_t flags)
> +cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
> + struct rte_mbuf **tx_pkts, uint16_t pkts,
> + uint64_t *cmd, const uint16_t flags)
> {
> + RTE_SET_USED(ws);
> RTE_SET_USED(tx_queue);
> RTE_SET_USED(tx_pkts);
> RTE_SET_USED(pkts);
> RTE_SET_USED(cmd);
> RTE_SET_USED(flags);
> - RTE_SET_USED(base);
> return 0;
> }
> #endif
> @@ -2892,7 +2897,7 @@ NIX_TX_FASTPATH_MODES
> if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> - return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, 0, \
> + return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
> flags); \
> }
>
> @@ -2905,8 +2910,8 @@ NIX_TX_FASTPATH_MODES
> if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> - return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
> - 0, \
> + return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
> + cmd, \
> flags | NIX_TX_MULTI_SEG_F); \
> }
>
> @@ -2919,8 +2924,8 @@ NIX_TX_FASTPATH_MODES
> if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> - return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, \
> - cmd, 0, (flags)); \
> + return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts, \
> + pkts, cmd, (flags)); \
> }
>
> #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags) \
> @@ -2933,7 +2938,7 @@ NIX_TX_FASTPATH_MODES
> !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
> return 0; \
> return cn10k_nix_xmit_pkts_vector( \
> - tx_queue, tx_pkts, pkts, cmd, 0, \
> + tx_queue, NULL, tx_pkts, pkts, cmd, \
> (flags) | NIX_TX_MULTI_SEG_F); \
> }
>
> diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
> index d34bc6898f..01e3850561 100644
> --- a/drivers/net/cnxk/cn9k_ethdev.c
> +++ b/drivers/net/cnxk/cn9k_ethdev.c
> @@ -131,51 +131,31 @@ static void
> nix_form_default_desc(struct cnxk_eth_dev *dev, struct cn9k_eth_txq *txq,
> uint16_t qid)
> {
> - struct nix_send_ext_s *send_hdr_ext;
> - struct nix_send_hdr_s *send_hdr;
> - struct nix_send_mem_s *send_mem;
> - union nix_send_sg_s *sg;
> + union nix_send_hdr_w0_u send_hdr_w0;
>
> /* Initialize the fields based on basic single segment packet */
> - memset(&txq->cmd, 0, sizeof(txq->cmd));
> -
> + send_hdr_w0.u = 0;
> if (dev->tx_offload_flags & NIX_TX_NEED_EXT_HDR) {
> - send_hdr = (struct nix_send_hdr_s *)&txq->cmd[0];
> /* 2(HDR) + 2(EXT_HDR) + 1(SG) + 1(IOVA) = 6/2 - 1 = 2 */
> - send_hdr->w0.sizem1 = 2;
> -
> - send_hdr_ext = (struct nix_send_ext_s *)&txq->cmd[2];
> - send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
> + send_hdr_w0.sizem1 = 2;
> if (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> /* Default: one seg packet would have:
> * 2(HDR) + 2(EXT) + 1(SG) + 1(IOVA) + 2(MEM)
> * => 8/2 - 1 = 3
> */
> - send_hdr->w0.sizem1 = 3;
> - send_hdr_ext->w0.tstmp = 1;
> + send_hdr_w0.sizem1 = 3;
>
> /* To calculate the offset for send_mem,
> * send_hdr->w0.sizem1 * 2
> */
> - send_mem = (struct nix_send_mem_s *)
> - (txq->cmd + (send_hdr->w0.sizem1 << 1));
> - send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM;
> - send_mem->w0.cn9k.alg = NIX_SENDMEMALG_SETTSTMP;
> - send_mem->addr = dev->tstamp.tx_tstamp_iova;
> + txq->ts_mem = dev->tstamp.tx_tstamp_iova;
> }
> - sg = (union nix_send_sg_s *)&txq->cmd[4];
> } else {
> - send_hdr = (struct nix_send_hdr_s *)&txq->cmd[0];
> /* 2(HDR) + 1(SG) + 1(IOVA) = 4/2 - 1 = 1 */
> - send_hdr->w0.sizem1 = 1;
> - sg = (union nix_send_sg_s *)&txq->cmd[2];
> + send_hdr_w0.sizem1 = 1;
> }
> -
> - send_hdr->w0.sq = qid;
> - sg->subdc = NIX_SUBDC_SG;
> - sg->segs = 1;
> - sg->ld_type = NIX_SENDLDTYPE_LDD;
> -
> + send_hdr_w0.sq = qid;
> + txq->send_hdr_w0 = send_hdr_w0.u;
> rte_wmb();
> }
>
> diff --git a/drivers/net/cnxk/cn9k_ethdev.h b/drivers/net/cnxk/cn9k_ethdev.h
> index 2b452fe009..8ab924944c 100644
> --- a/drivers/net/cnxk/cn9k_ethdev.h
> +++ b/drivers/net/cnxk/cn9k_ethdev.h
> @@ -9,12 +9,13 @@
> #include <cnxk_security_ar.h>
>
> struct cn9k_eth_txq {
> - uint64_t cmd[8];
> + uint64_t send_hdr_w0;
> int64_t fc_cache_pkts;
> uint64_t *fc_mem;
> void *lmt_addr;
> rte_iova_t io_addr;
> uint64_t lso_tun_fmt;
> + uint64_t ts_mem;
> uint16_t sqes_per_sqb_log2;
> int16_t nb_sqb_bufs_adj;
> rte_iova_t cpt_io_addr;
> diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
> index 8564dd85ee..d23e4b61b4 100644
> --- a/drivers/net/cnxk/cn9k_tx.h
> +++ b/drivers/net/cnxk/cn9k_tx.h
> @@ -58,6 +58,29 @@ cn9k_nix_tx_ext_subs(const uint16_t flags)
> : 0);
> }
>
> +static __rte_always_inline void
> +cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,
> + const uint16_t flags, const uint16_t static_sz)
> +{
> + if (static_sz)
> + cmd[0] = txq->send_hdr_w0;
> + else
> + cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
> + ((uint64_t)(cn9k_nix_tx_ext_subs(flags) + 1) << 40);
> + cmd[1] = 0;
> +
> + if (flags & NIX_TX_NEED_EXT_HDR) {
> + if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
> + cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
> + else
> + cmd[2] = NIX_SUBDC_EXT << 60;
> + cmd[3] = 0;
> + cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
> + } else {
> + cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
> + }
> +}
> +
> static __rte_always_inline void
> cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
> {
> @@ -136,11 +159,11 @@ cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
> w1.u = 0;
> }
>
> - if (!(flags & NIX_TX_MULTI_SEG_F)) {
> + if (!(flags & NIX_TX_MULTI_SEG_F))
> send_hdr->w0.total = m->data_len;
> - send_hdr->w0.aura =
> - roc_npa_aura_handle_to_aura(m->pool->pool_id);
> - }
> + else
> + send_hdr->w0.total = m->pkt_len;
> + send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
>
> /*
> * L3type: 2 => IPV4
> @@ -287,41 +310,39 @@ cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
> /* Mark mempool object as "put" since it is freed by NIX */
> if (!send_hdr->w0.df)
> RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
> + } else {
> + sg->seg1_size = m->data_len;
> + *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
> +
> + /* NOFF is handled later for multi-seg */
> }
> }
>
> static __rte_always_inline void
> -cn9k_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc,
> +cn9k_nix_xmit_prepare_tstamp(struct cn9k_eth_txq *txq, uint64_t *cmd,
> const uint64_t ol_flags, const uint16_t no_segdw,
> const uint16_t flags)
> {
> if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> struct nix_send_mem_s *send_mem;
> uint16_t off = (no_segdw - 1) << 1;
> - const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
> + const uint8_t is_ol_tstamp =
> + !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
>
> send_mem = (struct nix_send_mem_s *)(cmd + off);
> - if (flags & NIX_TX_MULTI_SEG_F) {
> - /* Retrieving the default desc values */
> - cmd[off] = send_mem_desc[6];
>
> - /* Using compiler barrier to avoid violation of C
> - * aliasing rules.
> - */
> - rte_compiler_barrier();
> - }
> -
> - /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
> + /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
> * should not be recorded, hence changing the alg type to
> - * NIX_SENDMEMALG_SET and also changing send mem addr field to
> + * NIX_SENDMEMALG_SUB and also changing send mem addr field to
> * next 8 bytes as it corrupts the actual Tx tstamp registered
> * address.
> */
> + send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM;
> send_mem->w0.cn9k.alg =
> - NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
> + NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
>
> - send_mem->addr = (rte_iova_t)((uint64_t *)send_mem_desc[7] +
> - (is_ol_tstamp));
> + send_mem->addr = (rte_iova_t)(((uint64_t *)txq->ts_mem) +
> + (is_ol_tstamp));
> }
> }
>
> @@ -367,8 +388,6 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
> uint8_t off, i;
>
> send_hdr = (struct nix_send_hdr_s *)cmd;
> - send_hdr->w0.total = m->pkt_len;
> - send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
>
> if (flags & NIX_TX_NEED_EXT_HDR)
> off = 2;
> @@ -376,13 +395,29 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
> off = 0;
>
> sg = (union nix_send_sg_s *)&cmd[2 + off];
> - /* Clear sg->u header before use */
> - sg->u &= 0xFC00000000000000;
> +
> + /* Start from second segment, first segment is already there */
> + i = 1;
> sg_u = sg->u;
> - slist = &cmd[3 + off];
> + nb_segs = m->nb_segs - 1;
> + m_next = m->next;
> + slist = &cmd[3 + off + 1];
>
> - i = 0;
> - nb_segs = m->nb_segs;
> + /* Set invert df if buffer is not to be freed by H/W */
> + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> + sg_u |= (cnxk_nix_prefree_seg(m) << 55);
> + rte_io_wmb();
> + }
> +
> + /* Mark mempool object as "put" since it is freed by NIX */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> + if (!(sg_u & (1ULL << 55)))
> + RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
> + rte_io_wmb();
> +#endif
> + m = m_next;
> + if (!m)
> + goto done;
>
> /* Fill mbuf segments */
> do {
> @@ -417,6 +452,7 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
> m = m_next;
> } while (nb_segs);
>
> +done:
> sg->u = sg_u;
> sg->segs = i;
> segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
> @@ -472,7 +508,7 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
>
> NIX_XMIT_FC_OR_RETURN(txq, pkts);
>
> - roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
> + cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
>
> /* Perform header writes before barrier for TSO */
> if (flags & NIX_TX_OFFLOAD_TSO_F) {
> @@ -490,8 +526,8 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
>
> for (i = 0; i < pkts; i++) {
> cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
> - cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
> - tx_pkts[i]->ol_flags, 4, flags);
> + cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,
> + flags);
> cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
> }
>
> @@ -514,7 +550,7 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
>
> NIX_XMIT_FC_OR_RETURN(txq, pkts);
>
> - roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
> + cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
>
> /* Perform header writes before barrier for TSO */
> if (flags & NIX_TX_OFFLOAD_TSO_F) {
> @@ -533,9 +569,8 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> for (i = 0; i < pkts; i++) {
> cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
> segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
> - cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
> - tx_pkts[i]->ol_flags, segdw,
> - flags);
> + cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,
> + segdw, flags);
> cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
> }
>
> @@ -862,28 +897,34 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
> rte_io_wmb();
>
> - senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
> + senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
> senddesc23_w0 = senddesc01_w0;
> +
> senddesc01_w1 = vdupq_n_u64(0);
> senddesc23_w1 = senddesc01_w1;
> + sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
> + sgdesc23_w0 = sgdesc01_w0;
>
> - /* Load command defaults into vector variables. */
> if (flags & NIX_TX_NEED_EXT_HDR) {
> - sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
> - sendext23_w0 = sendext01_w0;
> - sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
> - sendext23_w1 = sendext01_w1;
> - sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
> - sgdesc23_w0 = sgdesc01_w0;
> if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> - sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
> + sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
> + BIT_ULL(15));
> + sendmem01_w0 =
> + vdupq_n_u64((NIX_SUBDC_MEM << 60) |
> + (NIX_SENDMEMALG_SETTSTMP << 56));
> sendmem23_w0 = sendmem01_w0;
> - sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
> + sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
> sendmem23_w1 = sendmem01_w1;
> + } else {
> + sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
> }
> - } else {
> - sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
> - sgdesc23_w0 = sgdesc01_w0;
> + sendext23_w0 = sendext01_w0;
> +
> + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
> + sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
> + else
> + sendext01_w1 = vdupq_n_u64(0);
> + sendext23_w1 = sendext01_w1;
> }
>
> for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
> --
> 2.17.1
>
next prev parent reply other threads:[~2022-02-11 10:27 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-19 7:13 [PATCH v2 1/4] " pbhagavatula
2022-01-19 7:13 ` [PATCH v2 2/4] event/cnxk: store and reuse workslot status pbhagavatula
2022-01-19 7:13 ` [PATCH v2 3/4] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-01-19 7:13 ` [PATCH v2 4/4] net/cnxk: improve Rx performance pbhagavatula
2022-02-07 14:03 ` [PATCH v2 1/4] net/cnxk: avoid command copy from Tx queue Jerin Jacob
2022-02-10 10:13 ` [PATCH v3] " pbhagavatula
2022-02-10 10:19 ` Jerin Jacob
2022-02-10 13:15 ` [PATCH v4] " pbhagavatula
2022-02-11 10:27 ` Jerin Jacob [this message]
2022-02-10 10:19 ` [PATCH v3 1/3] event/cnxk: store and reuse workslot status pbhagavatula
2022-02-10 10:19 ` [PATCH v3 2/3] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-02-10 10:19 ` [PATCH v3 3/3] net/cnxk: improve Rx performance pbhagavatula
2022-02-10 13:20 ` [PATCH v4 1/3] event/cnxk: store and reuse workslot status pbhagavatula
2022-02-10 13:20 ` [PATCH v4 2/3] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-02-10 13:20 ` [PATCH v4 3/3] event/cnxk: improve Rx performance pbhagavatula
2022-02-14 9:29 ` [PATCH v4 1/3] event/cnxk: store and reuse workslot status Jerin Jacob
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CALBAE1OQd2L4WpOZXCCnbPtX_i8apcTwEfLH=BatnYJtGvsNVw@mail.gmail.com' \
--to=jerinjacobk@gmail.com \
--cc=adwivedi@marvell.com \
--cc=anoobj@marvell.com \
--cc=dev@dpdk.org \
--cc=ferruh.yigit@intel.com \
--cc=jerinj@marvell.com \
--cc=kirankumark@marvell.com \
--cc=ktejasree@marvell.com \
--cc=ndabilpuram@marvell.com \
--cc=pbhagavatula@marvell.com \
--cc=skori@marvell.com \
--cc=skoteshwar@marvell.com \
--cc=sthotton@marvell.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).