* [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow
@ 2021-10-04 8:37 pbhagavatula
2021-10-04 8:37 ` [dpdk-dev] [PATCH 2/3] event/cnxk: reduce workslot memory consumption pbhagavatula
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: pbhagavatula @ 2021-10-04 8:37 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
The transmit loop incorrectly assumes that nb_mbufs is always
a multiple of 4 when transmitting an event vector. The max
size of the vector might not be reached and pushed out early
due to timeout.
Fixes: 761a321acf91 ("event/cnxk: support vectorized Tx event fast path")
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: series-18614 ("add SSO XAQ pool create and free")
drivers/event/cnxk/cn10k_worker.h | 180 +++++++++++++-----------------
1 file changed, 77 insertions(+), 103 deletions(-)
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 1255662b6c..657ab91ac8 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -7,10 +7,10 @@
#include <rte_vect.h>
+#include "cn10k_cryptodev_ops.h"
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
-#include "cn10k_cryptodev_ops.h"
#include "cn10k_ethdev.h"
#include "cn10k_rx.h"
@@ -237,18 +237,16 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
cq_w1 = *(uint64_t *)(gw.u64[1] + 8);
- sa_base = cnxk_nix_sa_base_get(port,
- lookup_mem);
+ sa_base =
+ cnxk_nix_sa_base_get(port, lookup_mem);
sa_base &= ~(ROC_NIX_INL_SA_BASE_ALIGN - 1);
- mbuf = (uint64_t)nix_sec_meta_to_mbuf_sc(cq_w1,
- sa_base, (uintptr_t)&iova,
- &loff, (struct rte_mbuf *)mbuf,
- d_off);
+ mbuf = (uint64_t)nix_sec_meta_to_mbuf_sc(
+ cq_w1, sa_base, (uintptr_t)&iova, &loff,
+ (struct rte_mbuf *)mbuf, d_off);
if (loff)
roc_npa_aura_op_free(m->pool->pool_id,
0, iova);
-
}
gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
@@ -396,6 +394,56 @@ cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
}
+static __rte_always_inline void
+cn10k_sso_tx_one(struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
+ uintptr_t lmt_addr, uint8_t sched_type, uintptr_t base,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+ const uint32_t flags)
+{
+ uint8_t lnum = 0, loff = 0, shft = 0;
+ struct cn10k_eth_txq *txq;
+ uintptr_t laddr;
+ uint16_t segdw;
+ uintptr_t pa;
+ bool sec;
+
+ txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+ cn10k_nix_tx_skeleton(txq, cmd, flags);
+ /* Perform header writes before barrier
+ * for TSO
+ */
+ if (flags & NIX_TX_OFFLOAD_TSO_F)
+ cn10k_nix_xmit_prepare_tso(m, flags);
+
+ cn10k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, &sec);
+
+ laddr = lmt_addr;
+ /* Prepare CPT instruction and get nixtx addr if
+ * it is for CPT on same lmtline.
+ */
+ if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+ cn10k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff,
+ &shft, txq->sa_base, flags);
+
+ /* Move NIX desc to LMT/NIXTX area */
+ cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
+
+ if (flags & NIX_TX_MULTI_SEG_F)
+ segdw = cn10k_nix_prepare_mseg(m, (uint64_t *)laddr, flags);
+ else
+ segdw = cn10k_nix_tx_ext_subs(flags) + 2;
+
+ if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
+ pa = txq->cpt_io_addr | 3 << 4;
+ else
+ pa = txq->io_addr | ((segdw - 1) << 4);
+
+ if (!sched_type)
+ roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+
+ roc_lmt_submit_steorl(lmt_id, pa);
+}
+
static __rte_always_inline void
cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
@@ -404,11 +452,13 @@ cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
const uint32_t flags)
{
uint16_t port[4], queue[4];
+ uint16_t i, j, pkts, scalar;
struct cn10k_eth_txq *txq;
- uint16_t i, j;
- uintptr_t pa;
- for (i = 0; i < nb_mbufs; i += 4) {
+ scalar = nb_mbufs & (NIX_DESCS_PER_LOOP - 1);
+ pkts = RTE_ALIGN_FLOOR(nb_mbufs, NIX_DESCS_PER_LOOP);
+
+ for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
port[0] = mbufs[i]->port;
port[1] = mbufs[i + 1]->port;
port[2] = mbufs[i + 2]->port;
@@ -421,66 +471,25 @@ cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
-
- for (j = 0; j < 4; j++) {
- uint8_t lnum = 0, loff = 0, shft = 0;
- struct rte_mbuf *m = mbufs[i + j];
- uintptr_t laddr;
- uint16_t segdw;
- bool sec;
-
- txq = (struct cn10k_eth_txq *)
- txq_data[port[j]][queue[j]];
- cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Perform header writes before barrier
- * for TSO
- */
- if (flags & NIX_TX_OFFLOAD_TSO_F)
- cn10k_nix_xmit_prepare_tso(m, flags);
-
- cn10k_nix_xmit_prepare(m, cmd, flags,
- txq->lso_tun_fmt, &sec);
-
- laddr = lmt_addr;
- /* Prepare CPT instruction and get nixtx addr if
- * it is for CPT on same lmtline.
- */
- if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
- cn10k_nix_prep_sec(m, cmd, &laddr,
- lmt_addr, &lnum,
- &loff, &shft,
- txq->sa_base, flags);
-
- /* Move NIX desc to LMT/NIXTX area */
- cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
-
- if (flags & NIX_TX_MULTI_SEG_F) {
- segdw = cn10k_nix_prepare_mseg(m,
- (uint64_t *)laddr, flags);
- } else {
- segdw = cn10k_nix_tx_ext_subs(flags) +
- 2;
- }
-
- if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
- pa = txq->cpt_io_addr | 3 << 4;
- else
- pa = txq->io_addr | ((segdw - 1) << 4);
-
- if (!sched_type)
- roc_sso_hws_head_wait(base +
- SSOW_LF_GWS_TAG);
-
- roc_lmt_submit_steorl(lmt_id, pa);
- }
+ for (j = 0; j < 4; j++)
+ cn10k_sso_tx_one(mbufs[i + j], cmd, lmt_id,
+ lmt_addr, sched_type, base,
+ txq_data, flags);
} else {
txq = (struct cn10k_eth_txq *)
txq_data[port[0]][queue[0]];
- cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
- + SSOW_LF_GWS_TAG,
+ cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd,
+ base + SSOW_LF_GWS_TAG,
flags | NIX_TX_VWQE_F);
}
}
+
+ mbufs += i;
+
+ for (i = 0; i < scalar; i++) {
+ cn10k_sso_tx_one(mbufs[i], cmd, lmt_id, lmt_addr, sched_type,
+ base, txq_data, flags);
+ }
}
static __rte_always_inline uint16_t
@@ -489,19 +498,14 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
- uint8_t lnum = 0, loff = 0, shft = 0;
struct cn10k_eth_txq *txq;
- uint16_t ref_cnt, segdw;
struct rte_mbuf *m;
uintptr_t lmt_addr;
- uintptr_t c_laddr;
+ uint16_t ref_cnt;
uint16_t lmt_id;
- uintptr_t pa;
- bool sec;
lmt_addr = ws->lmt_base;
ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
- c_laddr = lmt_addr;
if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
struct rte_mbuf **mbufs = ev->vec->mbufs;
@@ -526,38 +530,8 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
m = ev->mbuf;
ref_cnt = m->refcnt;
- txq = cn10k_sso_hws_xtract_meta(m, txq_data);
- cn10k_nix_tx_skeleton(txq, cmd, flags);
- /* Perform header writes before barrier for TSO */
- if (flags & NIX_TX_OFFLOAD_TSO_F)
- cn10k_nix_xmit_prepare_tso(m, flags);
-
- cn10k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, &sec);
-
- /* Prepare CPT instruction and get nixtx addr if
- * it is for CPT on same lmtline.
- */
- if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
- cn10k_nix_prep_sec(m, cmd, &lmt_addr, c_laddr, &lnum, &loff,
- &shft, txq->sa_base, flags);
-
- /* Move NIX desc to LMT/NIXTX area */
- cn10k_nix_xmit_mv_lmt_base(lmt_addr, cmd, flags);
- if (flags & NIX_TX_MULTI_SEG_F) {
- segdw = cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
- } else {
- segdw = cn10k_nix_tx_ext_subs(flags) + 2;
- }
-
- if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
- pa = txq->cpt_io_addr | 3 << 4;
- else
- pa = txq->io_addr | ((segdw - 1) << 4);
-
- if (!ev->sched_type)
- roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
-
- roc_lmt_submit_steorl(lmt_id, pa);
+ cn10k_sso_tx_one(m, cmd, lmt_id, lmt_addr, ev->sched_type, ws->tx_base,
+ txq_data, flags);
if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
if (ref_cnt > 1)
--
2.33.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* [dpdk-dev] [PATCH 2/3] event/cnxk: reduce workslot memory consumption
2021-10-04 8:37 [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow pbhagavatula
@ 2021-10-04 8:37 ` pbhagavatula
2021-10-04 8:37 ` [dpdk-dev] [PATCH 3/3] event/cnxk: rework enqueue path pbhagavatula
2021-10-30 13:36 ` [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow Jerin Jacob
2 siblings, 0 replies; 4+ messages in thread
From: pbhagavatula @ 2021-10-04 8:37 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
SSO group base addresses are always are always contiguous we
need not store all the base addresses in workslot memory, instead
just store the base address and compute the group address offset
when required.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn10k_eventdev.c | 5 ++---
drivers/event/cnxk/cn10k_worker.h | 3 ++-
drivers/event/cnxk/cn9k_eventdev.c | 8 +++-----
drivers/event/cnxk/cn9k_worker.h | 6 ++++--
drivers/event/cnxk/cnxk_eventdev.c | 15 ++++++---------
drivers/event/cnxk/cnxk_eventdev.h | 8 ++++----
6 files changed, 21 insertions(+), 24 deletions(-)
diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index c2729a2c48..49bdd14208 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -91,14 +91,13 @@ cn10k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link)
}
static void
-cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t *grps_base)
+cn10k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
{
struct cnxk_sso_evdev *dev = arg;
struct cn10k_sso_hws *ws = hws;
uint64_t val;
- rte_memcpy(ws->grps_base, grps_base,
- sizeof(uintptr_t) * CNXK_SSO_MAX_HWGRP);
+ ws->grp_base = grp_base;
ws->fc_mem = (uint64_t *)dev->fc_iova;
ws->xaq_lmt = dev->xaq_lmt;
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 657ab91ac8..f8331e88d7 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -30,7 +30,8 @@ cn10k_sso_hws_new_event(struct cn10k_sso_hws *ws, const struct rte_event *ev)
if (ws->xaq_lmt <= *ws->fc_mem)
return 0;
- cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grps_base[grp]);
+ cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
+ ws->grp_base + (grp << 12));
return 1;
}
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 3a20b099ae..9886720310 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -87,7 +87,7 @@ cn9k_sso_hws_unlink(void *arg, void *port, uint16_t *map, uint16_t nb_link)
}
static void
-cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t *grps_base)
+cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
{
struct cnxk_sso_evdev *dev = arg;
struct cn9k_sso_hws_dual *dws;
@@ -98,8 +98,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t *grps_base)
val = NSEC2USEC(dev->deq_tmo_ns) - 1;
if (dev->dual_ws) {
dws = hws;
- rte_memcpy(dws->grps_base, grps_base,
- sizeof(uintptr_t) * CNXK_SSO_MAX_HWGRP);
+ dws->grp_base = grp_base;
dws->fc_mem = (uint64_t *)dev->fc_iova;
dws->xaq_lmt = dev->xaq_lmt;
@@ -107,8 +106,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t *grps_base)
plt_write64(val, dws->base[1] + SSOW_LF_GWS_NW_TIM);
} else {
ws = hws;
- rte_memcpy(ws->grps_base, grps_base,
- sizeof(uintptr_t) * CNXK_SSO_MAX_HWGRP);
+ ws->grp_base = grp_base;
ws->fc_mem = (uint64_t *)dev->fc_iova;
ws->xaq_lmt = dev->xaq_lmt;
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 6be9be0b47..320e39da7b 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -31,7 +31,8 @@ cn9k_sso_hws_new_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
if (ws->xaq_lmt <= *ws->fc_mem)
return 0;
- cnxk_sso_hws_add_work(event_ptr, tag, new_tt, ws->grps_base[grp]);
+ cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
+ ws->grp_base + (grp << 12));
return 1;
}
@@ -108,7 +109,8 @@ cn9k_sso_hws_dual_new_event(struct cn9k_sso_hws_dual *dws,
if (dws->xaq_lmt <= *dws->fc_mem)
return 0;
- cnxk_sso_hws_add_work(event_ptr, tag, new_tt, dws->grps_base[grp]);
+ cnxk_sso_hws_add_work(event_ptr, tag, new_tt,
+ dws->grp_base + (grp << 12));
return 1;
}
diff --git a/drivers/event/cnxk/cnxk_eventdev.c b/drivers/event/cnxk/cnxk_eventdev.c
index 84bf8cb6d1..c127034d37 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -332,8 +332,7 @@ cnxk_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
cnxk_sso_hws_setup_t hws_setup_fn)
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
- uintptr_t grps_base[CNXK_SSO_MAX_HWGRP] = {0};
- uint16_t q;
+ uintptr_t grp_base = 0;
plt_sso_dbg("Port=%d", port_id);
if (event_dev->data->ports[port_id] == NULL) {
@@ -341,15 +340,13 @@ cnxk_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
return -EINVAL;
}
- for (q = 0; q < dev->nb_event_queues; q++) {
- grps_base[q] = roc_sso_hwgrp_base_get(&dev->sso, q);
- if (grps_base[q] == 0) {
- plt_err("Failed to get grp[%d] base addr", q);
- return -EINVAL;
- }
+ grp_base = roc_sso_hwgrp_base_get(&dev->sso, 0);
+ if (grp_base == 0) {
+ plt_err("Failed to get grp base addr");
+ return -EINVAL;
}
- hws_setup_fn(dev, event_dev->data->ports[port_id], grps_base);
+ hws_setup_fn(dev, event_dev->data->ports[port_id], grp_base);
plt_sso_dbg("Port=%d ws=%p", port_id, event_dev->data->ports[port_id]);
rte_mb();
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ccd09b1d82..ce9f965691 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -61,7 +61,7 @@
} while (0)
typedef void *(*cnxk_sso_init_hws_mem_t)(void *dev, uint8_t port_id);
-typedef void (*cnxk_sso_hws_setup_t)(void *dev, void *ws, uintptr_t *grp_base);
+typedef void (*cnxk_sso_hws_setup_t)(void *dev, void *ws, uintptr_t grp_base);
typedef void (*cnxk_sso_hws_release_t)(void *dev, void *ws);
typedef int (*cnxk_sso_link_t)(void *dev, void *ws, uint16_t *map,
uint16_t nb_link);
@@ -129,7 +129,7 @@ struct cn10k_sso_hws {
/* Add Work Fastpath data */
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
- uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ uintptr_t grp_base;
/* Tx Fastpath data */
uint64_t tx_base __rte_cache_aligned;
uintptr_t lmt_base;
@@ -157,7 +157,7 @@ struct cn9k_sso_hws {
/* Add Work Fastpath data */
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
- uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ uintptr_t grp_base;
/* Tx Fastpath data */
uint64_t base __rte_cache_aligned;
uint8_t tx_adptr_data[];
@@ -179,7 +179,7 @@ struct cn9k_sso_hws_dual {
/* Add Work Fastpath data */
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
- uintptr_t grps_base[CNXK_SSO_MAX_HWGRP];
+ uintptr_t grp_base;
/* Tx Fastpath data */
uint64_t base[2] __rte_cache_aligned;
uint8_t tx_adptr_data[];
--
2.33.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* [dpdk-dev] [PATCH 3/3] event/cnxk: rework enqueue path
2021-10-04 8:37 [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow pbhagavatula
2021-10-04 8:37 ` [dpdk-dev] [PATCH 2/3] event/cnxk: reduce workslot memory consumption pbhagavatula
@ 2021-10-04 8:37 ` pbhagavatula
2021-10-30 13:36 ` [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow Jerin Jacob
2 siblings, 0 replies; 4+ messages in thread
From: pbhagavatula @ 2021-10-04 8:37 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Shijith Thotton; +Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Rework SSO enqueue path for CN9K make it similar to CN10K
enqueue interface.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/cnxk/cn9k_eventdev.c | 28 ++-----
drivers/event/cnxk/cn9k_worker.c | 21 ++---
drivers/event/cnxk/cn9k_worker.h | 78 +++++++++----------
drivers/event/cnxk/cn9k_worker_deq.c | 4 +-
drivers/event/cnxk/cn9k_worker_deq_ca.c | 4 +-
drivers/event/cnxk/cn9k_worker_deq_tmo.c | 4 +-
drivers/event/cnxk/cn9k_worker_dual_deq.c | 16 ++--
drivers/event/cnxk/cn9k_worker_dual_deq_ca.c | 19 +++--
drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c | 26 +++----
drivers/event/cnxk/cnxk_eventdev.h | 25 +-----
10 files changed, 96 insertions(+), 129 deletions(-)
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 9886720310..a09722b717 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -27,17 +27,6 @@
[!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] \
[!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)])
-static void
-cn9k_init_hws_ops(struct cn9k_sso_hws_state *ws, uintptr_t base)
-{
- ws->tag_op = base + SSOW_LF_GWS_TAG;
- ws->wqp_op = base + SSOW_LF_GWS_WQP;
- ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK0;
- ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH;
- ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM;
- ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
-}
-
static int
cn9k_sso_hws_link(void *arg, void *port, uint16_t *map, uint16_t nb_link)
{
@@ -95,7 +84,7 @@ cn9k_sso_hws_setup(void *arg, void *hws, uintptr_t grp_base)
uint64_t val;
/* Set get_work tmo for HWS */
- val = NSEC2USEC(dev->deq_tmo_ns) - 1;
+ val = dev->deq_tmo_ns ? NSEC2USEC(dev->deq_tmo_ns) - 1 : 0;
if (dev->dual_ws) {
dws = hws;
dws->grp_base = grp_base;
@@ -148,7 +137,6 @@ cn9k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
{
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(arg);
struct cn9k_sso_hws_dual *dws;
- struct cn9k_sso_hws_state *st;
struct cn9k_sso_hws *ws;
uint64_t cq_ds_cnt = 1;
uint64_t aq_cnt = 1;
@@ -170,22 +158,21 @@ cn9k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base,
if (dev->dual_ws) {
dws = hws;
- st = &dws->ws_state[0];
ws_base = dws->base[0];
} else {
ws = hws;
- st = (struct cn9k_sso_hws_state *)ws;
ws_base = ws->base;
}
while (aq_cnt || cq_ds_cnt || ds_cnt) {
- plt_write64(req, st->getwrk_op);
- cn9k_sso_hws_get_work_empty(st, &ev);
+ plt_write64(req, ws_base + SSOW_LF_GWS_OP_GET_WORK0);
+ cn9k_sso_hws_get_work_empty(ws_base, &ev);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- cnxk_sso_hws_swtag_flush(st->tag_op,
- st->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(
+ ws_base + SSOW_LF_GWS_TAG,
+ ws_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
do {
val = plt_read64(ws_base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
@@ -674,8 +661,6 @@ cn9k_sso_init_hws_mem(void *arg, uint8_t port_id)
&dev->sso, CN9K_DUAL_WS_PAIR_ID(port_id, 0));
dws->base[1] = roc_sso_hws_base_get(
&dev->sso, CN9K_DUAL_WS_PAIR_ID(port_id, 1));
- cn9k_init_hws_ops(&dws->ws_state[0], dws->base[0]);
- cn9k_init_hws_ops(&dws->ws_state[1], dws->base[1]);
dws->hws_id = port_id;
dws->swtag_req = 0;
dws->vws = 0;
@@ -695,7 +680,6 @@ cn9k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = RTE_PTR_ADD(ws, sizeof(struct cnxk_sso_hws_cookie));
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
- cn9k_init_hws_ops((struct cn9k_sso_hws_state *)ws, ws->base);
ws->hws_id = port_id;
ws->swtag_req = 0;
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 32f7cc0343..a981bc986f 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -19,7 +19,8 @@ cn9k_sso_hws_enq(void *port, const struct rte_event *ev)
cn9k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_TAG,
+ ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -67,17 +68,18 @@ uint16_t __rte_hot
cn9k_sso_hws_dual_enq(void *port, const struct rte_event *ev)
{
struct cn9k_sso_hws_dual *dws = port;
- struct cn9k_sso_hws_state *vws;
+ uint64_t base;
- vws = &dws->ws_state[!dws->vws];
+ base = dws->base[!dws->vws];
switch (ev->op) {
case RTE_EVENT_OP_NEW:
return cn9k_sso_hws_dual_new_event(dws, ev);
case RTE_EVENT_OP_FORWARD:
- cn9k_sso_hws_dual_forward_event(dws, vws, ev);
+ cn9k_sso_hws_dual_forward_event(dws, base, ev);
break;
case RTE_EVENT_OP_RELEASE:
- cnxk_sso_hws_swtag_flush(vws->tag_op, vws->swtag_flush_op);
+ cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
break;
default:
return 0;
@@ -114,7 +116,7 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
struct cn9k_sso_hws_dual *dws = port;
RTE_SET_USED(nb_events);
- cn9k_sso_hws_dual_forward_event(dws, &dws->ws_state[!dws->vws], ev);
+ cn9k_sso_hws_dual_forward_event(dws, dws->base[!dws->vws], ev);
return 1;
}
@@ -126,7 +128,8 @@ cn9k_sso_hws_ca_enq(void *port, struct rte_event ev[], uint16_t nb_events)
RTE_SET_USED(nb_events);
- return cn9k_cpt_crypto_adapter_enqueue(ws->tag_op, ev->event_ptr);
+ return cn9k_cpt_crypto_adapter_enqueue(ws->base + SSOW_LF_GWS_TAG,
+ ev->event_ptr);
}
uint16_t __rte_hot
@@ -136,6 +139,6 @@ cn9k_sso_hws_dual_ca_enq(void *port, struct rte_event ev[], uint16_t nb_events)
RTE_SET_USED(nb_events);
- return cn9k_cpt_crypto_adapter_enqueue(dws->ws_state[!dws->vws].tag_op,
- ev->event_ptr);
+ return cn9k_cpt_crypto_adapter_enqueue(
+ dws->base[!dws->vws] + SSOW_LF_GWS_TAG, ev->event_ptr);
}
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 320e39da7b..57114e83ad 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -37,12 +37,12 @@ cn9k_sso_hws_new_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
}
static __rte_always_inline void
-cn9k_sso_hws_fwd_swtag(struct cn9k_sso_hws_state *vws,
- const struct rte_event *ev)
+cn9k_sso_hws_fwd_swtag(uint64_t base, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = CNXK_TT_FROM_TAG(plt_read64(vws->tag_op));
+ const uint8_t cur_tt =
+ CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_TAG));
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -54,24 +54,24 @@ cn9k_sso_hws_fwd_swtag(struct cn9k_sso_hws_state *vws,
if (new_tt == SSO_TT_UNTAGGED) {
if (cur_tt != SSO_TT_UNTAGGED)
- cnxk_sso_hws_swtag_untag(
- CN9K_SSOW_GET_BASE_ADDR(vws->getwrk_op) +
- SSOW_LF_GWS_OP_SWTAG_UNTAG);
+ cnxk_sso_hws_swtag_untag(base +
+ SSOW_LF_GWS_OP_SWTAG_UNTAG);
} else {
- cnxk_sso_hws_swtag_norm(tag, new_tt, vws->swtag_norm_op);
+ cnxk_sso_hws_swtag_norm(tag, new_tt,
+ base + SSOW_LF_GWS_OP_SWTAG_NORM);
}
}
static __rte_always_inline void
-cn9k_sso_hws_fwd_group(struct cn9k_sso_hws_state *ws,
- const struct rte_event *ev, const uint16_t grp)
+cn9k_sso_hws_fwd_group(uint64_t base, const struct rte_event *ev,
+ const uint16_t grp)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- plt_write64(ev->u64, CN9K_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
- SSOW_LF_GWS_OP_UPD_WQP_GRP1);
- cnxk_sso_hws_swtag_desched(tag, new_tt, grp, ws->swtag_desched_op);
+ plt_write64(ev->u64, base + SSOW_LF_GWS_OP_UPD_WQP_GRP1);
+ cnxk_sso_hws_swtag_desched(tag, new_tt, grp,
+ base + SSOW_LF_GWS_OP_SWTAG_DESCHED);
}
static __rte_always_inline void
@@ -80,8 +80,8 @@ cn9k_sso_hws_forward_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->tag_op)) == grp) {
- cn9k_sso_hws_fwd_swtag((struct cn9k_sso_hws_state *)ws, ev);
+ if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_TAG)) == grp) {
+ cn9k_sso_hws_fwd_swtag(ws->base, ev);
ws->swtag_req = 1;
} else {
/*
@@ -89,8 +89,7 @@ cn9k_sso_hws_forward_event(struct cn9k_sso_hws *ws, const struct rte_event *ev)
* Use deschedule/add_work operation to transfer the event to
* new group/core
*/
- cn9k_sso_hws_fwd_group((struct cn9k_sso_hws_state *)ws, ev,
- grp);
+ cn9k_sso_hws_fwd_group(ws->base, ev, grp);
}
}
@@ -115,15 +114,14 @@ cn9k_sso_hws_dual_new_event(struct cn9k_sso_hws_dual *dws,
}
static __rte_always_inline void
-cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
- struct cn9k_sso_hws_state *vws,
+cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws, uint64_t base,
const struct rte_event *ev)
{
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(vws->tag_op)) == grp) {
- cn9k_sso_hws_fwd_swtag(vws, ev);
+ if (CNXK_GRP_FROM_TAG(plt_read64(base + SSOW_LF_GWS_TAG)) == grp) {
+ cn9k_sso_hws_fwd_swtag(base, ev);
dws->swtag_req = 1;
} else {
/*
@@ -131,7 +129,7 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
* Use deschedule/add_work operation to transfer the event to
* new group/core
*/
- cn9k_sso_hws_fwd_group(vws, ev, grp);
+ cn9k_sso_hws_fwd_group(base, ev, grp);
}
}
@@ -149,8 +147,7 @@ cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
}
static __rte_always_inline uint16_t
-cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
- struct cn9k_sso_hws_state *ws_pair,
+cn9k_sso_hws_dual_get_work(uint64_t base, uint64_t pair_base,
struct rte_event *ev, const uint32_t flags,
const void *const lookup_mem,
struct cnxk_timesync_info *const tstamp)
@@ -177,14 +174,15 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
" prfm pldl1keep, [%[mbuf]] \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
[mbuf] "=&r"(mbuf)
- : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
- [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
+ : [tag_loc] "r"(base + SSOW_LF_GWS_TAG),
+ [wqp_loc] "r"(base + SSOW_LF_GWS_WQP), [gw] "r"(set_gw),
+ [pong] "r"(pair_base + SSOW_LF_GWS_OP_GET_WORK0));
#else
- gw.u64[0] = plt_read64(ws->tag_op);
+ gw.u64[0] = plt_read64(base + SSOW_LF_GWS_TAG);
while ((BIT_ULL(63)) & gw.u64[0])
- gw.u64[0] = plt_read64(ws->tag_op);
- gw.u64[1] = plt_read64(ws->wqp_op);
- plt_write64(set_gw, ws_pair->getwrk_op);
+ gw.u64[0] = plt_read64(base + SSOW_LF_GWS_TAG);
+ gw.u64[1] = plt_read64(base + SSOW_LF_GWS_WQP);
+ plt_write64(set_gw, pair_base + SSOW_LF_GWS_OP_GET_WORK0);
mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
@@ -236,7 +234,7 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
plt_write64(BIT_ULL(16) | /* wait for work. */
1, /* Use Mask set 0. */
- ws->getwrk_op);
+ ws->base + SSOW_LF_GWS_OP_GET_WORK0);
if (flags & NIX_RX_OFFLOAD_PTYPE_F)
rte_prefetch_non_temporal(lookup_mem);
@@ -255,13 +253,14 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
" prfm pldl1keep, [%[mbuf]] \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
[mbuf] "=&r"(mbuf)
- : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
+ : [tag_loc] "r"(ws->base + SSOW_LF_GWS_TAG),
+ [wqp_loc] "r"(ws->base + SSOW_LF_GWS_WQP));
#else
- gw.u64[0] = plt_read64(ws->tag_op);
+ gw.u64[0] = plt_read64(ws->base + SSOW_LF_GWS_TAG);
while ((BIT_ULL(63)) & gw.u64[0])
- gw.u64[0] = plt_read64(ws->tag_op);
+ gw.u64[0] = plt_read64(ws->base + SSOW_LF_GWS_TAG);
- gw.u64[1] = plt_read64(ws->wqp_op);
+ gw.u64[1] = plt_read64(ws->base + SSOW_LF_GWS_WQP);
mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
@@ -303,7 +302,7 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
/* Used in cleaning up workslot. */
static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work_empty(uint64_t base, struct rte_event *ev)
{
union {
__uint128_t get_work;
@@ -325,13 +324,14 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
" sub %[mbuf], %[wqp], #0x80 \n"
: [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
[mbuf] "=&r"(mbuf)
- : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
+ : [tag_loc] "r"(base + SSOW_LF_GWS_TAG),
+ [wqp_loc] "r"(base + SSOW_LF_GWS_WQP));
#else
- gw.u64[0] = plt_read64(ws->tag_op);
+ gw.u64[0] = plt_read64(base + SSOW_LF_GWS_TAG);
while ((BIT_ULL(63)) & gw.u64[0])
- gw.u64[0] = plt_read64(ws->tag_op);
+ gw.u64[0] = plt_read64(base + SSOW_LF_GWS_TAG);
- gw.u64[1] = plt_read64(ws->wqp_op);
+ gw.u64[1] = plt_read64(base + SSOW_LF_GWS_WQP);
mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
diff --git a/drivers/event/cnxk/cn9k_worker_deq.c b/drivers/event/cnxk/cn9k_worker_deq.c
index d65c72af7a..ba6fd05381 100644
--- a/drivers/event/cnxk/cn9k_worker_deq.c
+++ b/drivers/event/cnxk/cn9k_worker_deq.c
@@ -16,7 +16,7 @@
\
if (ws->swtag_req) { \
ws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_TAG); \
return 1; \
} \
\
@@ -32,7 +32,7 @@
\
if (ws->swtag_req) { \
ws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_TAG); \
return 1; \
} \
\
diff --git a/drivers/event/cnxk/cn9k_worker_deq_ca.c b/drivers/event/cnxk/cn9k_worker_deq_ca.c
index b5d0263559..ffe7a7c9e2 100644
--- a/drivers/event/cnxk/cn9k_worker_deq_ca.c
+++ b/drivers/event/cnxk/cn9k_worker_deq_ca.c
@@ -16,7 +16,7 @@
\
if (ws->swtag_req) { \
ws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_TAG); \
return 1; \
} \
\
@@ -42,7 +42,7 @@
\
if (ws->swtag_req) { \
ws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_TAG); \
return 1; \
} \
\
diff --git a/drivers/event/cnxk/cn9k_worker_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
index b41a590fb7..5147c1933a 100644
--- a/drivers/event/cnxk/cn9k_worker_deq_tmo.c
+++ b/drivers/event/cnxk/cn9k_worker_deq_tmo.c
@@ -16,7 +16,7 @@
\
if (ws->swtag_req) { \
ws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_TAG); \
return ret; \
} \
\
@@ -46,7 +46,7 @@
\
if (ws->swtag_req) { \
ws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait(ws->tag_op); \
+ cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_TAG); \
return ret; \
} \
\
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq.c b/drivers/event/cnxk/cn9k_worker_dual_deq.c
index 440b66edca..ed134ab779 100644
--- a/drivers/event/cnxk/cn9k_worker_dual_deq.c
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq.c
@@ -16,14 +16,14 @@
RTE_SET_USED(timeout_ticks); \
if (dws->swtag_req) { \
dws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait( \
- dws->ws_state[!dws->vws].tag_op); \
+ cnxk_sso_hws_swtag_wait(dws->base[!dws->vws] + \
+ SSOW_LF_GWS_TAG); \
return 1; \
} \
\
gw = cn9k_sso_hws_dual_get_work( \
- &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
- ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->base[dws->vws], dws->base[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
dws->vws = !dws->vws; \
return gw; \
} \
@@ -37,14 +37,14 @@
RTE_SET_USED(timeout_ticks); \
if (dws->swtag_req) { \
dws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait( \
- dws->ws_state[!dws->vws].tag_op); \
+ cnxk_sso_hws_swtag_wait(dws->base[!dws->vws] + \
+ SSOW_LF_GWS_TAG); \
return 1; \
} \
\
gw = cn9k_sso_hws_dual_get_work( \
- &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
- ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->base[dws->vws], dws->base[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
dws->vws = !dws->vws; \
return gw; \
}
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_ca.c b/drivers/event/cnxk/cn9k_worker_dual_deq_ca.c
index b66e2cfc08..22e148be73 100644
--- a/drivers/event/cnxk/cn9k_worker_dual_deq_ca.c
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_ca.c
@@ -16,15 +16,14 @@
RTE_SET_USED(timeout_ticks); \
if (dws->swtag_req) { \
dws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait( \
- dws->ws_state[!dws->vws].tag_op); \
+ cnxk_sso_hws_swtag_wait(dws->base[!dws->vws] + \
+ SSOW_LF_GWS_TAG); \
return 1; \
} \
\
- gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws], \
- &dws->ws_state[!dws->vws], ev, \
- flags | CPT_RX_WQE_F, \
- dws->lookup_mem, dws->tstamp); \
+ gw = cn9k_sso_hws_dual_get_work( \
+ dws->base[dws->vws], dws->base[!dws->vws], ev, \
+ flags | CPT_RX_WQE_F, dws->lookup_mem, dws->tstamp); \
dws->vws = !dws->vws; \
return gw; \
} \
@@ -48,14 +47,14 @@
RTE_SET_USED(timeout_ticks); \
if (dws->swtag_req) { \
dws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait( \
- dws->ws_state[!dws->vws].tag_op); \
+ cnxk_sso_hws_swtag_wait(dws->base[!dws->vws] + \
+ SSOW_LF_GWS_TAG); \
return 1; \
} \
\
gw = cn9k_sso_hws_dual_get_work( \
- &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
- ev, flags | NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F, \
+ dws->base[dws->vws], dws->base[!dws->vws], ev, \
+ flags | NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F, \
dws->lookup_mem, dws->tstamp); \
dws->vws = !dws->vws; \
return gw; \
diff --git a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
index 78a4b3d127..e5ba3feb22 100644
--- a/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
+++ b/drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c
@@ -16,20 +16,19 @@
\
if (dws->swtag_req) { \
dws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait( \
- dws->ws_state[!dws->vws].tag_op); \
+ cnxk_sso_hws_swtag_wait(dws->base[!dws->vws] + \
+ SSOW_LF_GWS_TAG); \
return ret; \
} \
\
ret = cn9k_sso_hws_dual_get_work( \
- &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
- ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->base[dws->vws], dws->base[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
dws->vws = !dws->vws; \
for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
ret = cn9k_sso_hws_dual_get_work( \
- &dws->ws_state[dws->vws], \
- &dws->ws_state[!dws->vws], ev, flags, \
- dws->lookup_mem, dws->tstamp); \
+ dws->base[dws->vws], dws->base[!dws->vws], ev, \
+ flags, dws->lookup_mem, dws->tstamp); \
dws->vws = !dws->vws; \
} \
\
@@ -55,20 +54,19 @@
\
if (dws->swtag_req) { \
dws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait( \
- dws->ws_state[!dws->vws].tag_op); \
+ cnxk_sso_hws_swtag_wait(dws->base[!dws->vws] + \
+ SSOW_LF_GWS_TAG); \
return ret; \
} \
\
ret = cn9k_sso_hws_dual_get_work( \
- &dws->ws_state[dws->vws], &dws->ws_state[!dws->vws], \
- ev, flags, dws->lookup_mem, dws->tstamp); \
+ dws->base[dws->vws], dws->base[!dws->vws], ev, flags, \
+ dws->lookup_mem, dws->tstamp); \
dws->vws = !dws->vws; \
for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) { \
ret = cn9k_sso_hws_dual_get_work( \
- &dws->ws_state[dws->vws], \
- &dws->ws_state[!dws->vws], ev, flags, \
- dws->lookup_mem, dws->tstamp); \
+ dws->base[dws->vws], dws->base[!dws->vws], ev, \
+ flags, dws->lookup_mem, dws->tstamp); \
dws->vws = !dws->vws; \
} \
\
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index ce9f965691..73ca308c64 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -136,19 +136,9 @@ struct cn10k_sso_hws {
uint8_t tx_adptr_data[];
} __rte_cache_aligned;
-/* CN9K HWS ops */
-#define CN9K_SSO_HWS_OPS \
- uintptr_t swtag_desched_op; \
- uintptr_t swtag_flush_op; \
- uintptr_t swtag_norm_op; \
- uintptr_t getwrk_op; \
- uintptr_t tag_op; \
- uintptr_t wqp_op
-
/* Event port a.k.a GWS */
struct cn9k_sso_hws {
- /* Get Work Fastpath data */
- CN9K_SSO_HWS_OPS;
+ uint64_t base;
/* PTP timestamp */
struct cnxk_timesync_info *tstamp;
void *lookup_mem;
@@ -159,17 +149,11 @@ struct cn9k_sso_hws {
uint64_t *fc_mem;
uintptr_t grp_base;
/* Tx Fastpath data */
- uint64_t base __rte_cache_aligned;
- uint8_t tx_adptr_data[];
+ uint8_t tx_adptr_data[] __rte_cache_aligned;
} __rte_cache_aligned;
-struct cn9k_sso_hws_state {
- CN9K_SSO_HWS_OPS;
-};
-
struct cn9k_sso_hws_dual {
- /* Get Work Fastpath data */
- struct cn9k_sso_hws_state ws_state[2]; /* Ping and Pong */
+ uint64_t base[2]; /* Ping and Pong */
/* PTP timestamp */
struct cnxk_timesync_info *tstamp;
void *lookup_mem;
@@ -181,8 +165,7 @@ struct cn9k_sso_hws_dual {
uint64_t *fc_mem;
uintptr_t grp_base;
/* Tx Fastpath data */
- uint64_t base[2] __rte_cache_aligned;
- uint8_t tx_adptr_data[];
+ uint8_t tx_adptr_data[] __rte_cache_aligned;
} __rte_cache_aligned;
struct cnxk_sso_hws_cookie {
--
2.33.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow
2021-10-04 8:37 [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow pbhagavatula
2021-10-04 8:37 ` [dpdk-dev] [PATCH 2/3] event/cnxk: reduce workslot memory consumption pbhagavatula
2021-10-04 8:37 ` [dpdk-dev] [PATCH 3/3] event/cnxk: rework enqueue path pbhagavatula
@ 2021-10-30 13:36 ` Jerin Jacob
2 siblings, 0 replies; 4+ messages in thread
From: Jerin Jacob @ 2021-10-30 13:36 UTC (permalink / raw)
To: Pavan Nikhilesh; +Cc: Jerin Jacob, Shijith Thotton, dpdk-dev
On Mon, Oct 4, 2021 at 2:07 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> The transmit loop incorrectly assumes that nb_mbufs is always
> a multiple of 4 when transmitting an event vector. The max
> size of the vector might not be reached and pushed out early
> due to timeout.
>
> Fixes: 761a321acf91 ("event/cnxk: support vectorized Tx event fast path")
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Please rebase
[for-main]dell[dpdk-next-eventdev] $ git pw series apply 19356
Applying: event/cnxk: fix packet Tx overflow
Applying: event/cnxk: reduce workslot memory consumption
error: sha1 information is lacking or useless
(drivers/event/cnxk/cnxk_eventdev.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0002 event/cnxk: reduce workslot memory consumption
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
> ---
> Depends-on: series-18614 ("add SSO XAQ pool create and free")
>
> drivers/event/cnxk/cn10k_worker.h | 180 +++++++++++++-----------------
> 1 file changed, 77 insertions(+), 103 deletions(-)
>
> diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
> index 1255662b6c..657ab91ac8 100644
> --- a/drivers/event/cnxk/cn10k_worker.h
> +++ b/drivers/event/cnxk/cn10k_worker.h
> @@ -7,10 +7,10 @@
>
> #include <rte_vect.h>
>
> +#include "cn10k_cryptodev_ops.h"
> #include "cnxk_ethdev.h"
> #include "cnxk_eventdev.h"
> #include "cnxk_worker.h"
> -#include "cn10k_cryptodev_ops.h"
>
> #include "cn10k_ethdev.h"
> #include "cn10k_rx.h"
> @@ -237,18 +237,16 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
>
> cq_w1 = *(uint64_t *)(gw.u64[1] + 8);
>
> - sa_base = cnxk_nix_sa_base_get(port,
> - lookup_mem);
> + sa_base =
> + cnxk_nix_sa_base_get(port, lookup_mem);
> sa_base &= ~(ROC_NIX_INL_SA_BASE_ALIGN - 1);
>
> - mbuf = (uint64_t)nix_sec_meta_to_mbuf_sc(cq_w1,
> - sa_base, (uintptr_t)&iova,
> - &loff, (struct rte_mbuf *)mbuf,
> - d_off);
> + mbuf = (uint64_t)nix_sec_meta_to_mbuf_sc(
> + cq_w1, sa_base, (uintptr_t)&iova, &loff,
> + (struct rte_mbuf *)mbuf, d_off);
> if (loff)
> roc_npa_aura_op_free(m->pool->pool_id,
> 0, iova);
> -
> }
>
> gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
> @@ -396,6 +394,56 @@ cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
> txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
> }
>
> +static __rte_always_inline void
> +cn10k_sso_tx_one(struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
> + uintptr_t lmt_addr, uint8_t sched_type, uintptr_t base,
> + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> + const uint32_t flags)
> +{
> + uint8_t lnum = 0, loff = 0, shft = 0;
> + struct cn10k_eth_txq *txq;
> + uintptr_t laddr;
> + uint16_t segdw;
> + uintptr_t pa;
> + bool sec;
> +
> + txq = cn10k_sso_hws_xtract_meta(m, txq_data);
> + cn10k_nix_tx_skeleton(txq, cmd, flags);
> + /* Perform header writes before barrier
> + * for TSO
> + */
> + if (flags & NIX_TX_OFFLOAD_TSO_F)
> + cn10k_nix_xmit_prepare_tso(m, flags);
> +
> + cn10k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, &sec);
> +
> + laddr = lmt_addr;
> + /* Prepare CPT instruction and get nixtx addr if
> + * it is for CPT on same lmtline.
> + */
> + if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
> + cn10k_nix_prep_sec(m, cmd, &laddr, lmt_addr, &lnum, &loff,
> + &shft, txq->sa_base, flags);
> +
> + /* Move NIX desc to LMT/NIXTX area */
> + cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
> +
> + if (flags & NIX_TX_MULTI_SEG_F)
> + segdw = cn10k_nix_prepare_mseg(m, (uint64_t *)laddr, flags);
> + else
> + segdw = cn10k_nix_tx_ext_subs(flags) + 2;
> +
> + if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
> + pa = txq->cpt_io_addr | 3 << 4;
> + else
> + pa = txq->io_addr | ((segdw - 1) << 4);
> +
> + if (!sched_type)
> + roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> +
> + roc_lmt_submit_steorl(lmt_id, pa);
> +}
> +
> static __rte_always_inline void
> cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
> uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
> @@ -404,11 +452,13 @@ cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
> const uint32_t flags)
> {
> uint16_t port[4], queue[4];
> + uint16_t i, j, pkts, scalar;
> struct cn10k_eth_txq *txq;
> - uint16_t i, j;
> - uintptr_t pa;
>
> - for (i = 0; i < nb_mbufs; i += 4) {
> + scalar = nb_mbufs & (NIX_DESCS_PER_LOOP - 1);
> + pkts = RTE_ALIGN_FLOOR(nb_mbufs, NIX_DESCS_PER_LOOP);
> +
> + for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
> port[0] = mbufs[i]->port;
> port[1] = mbufs[i + 1]->port;
> port[2] = mbufs[i + 2]->port;
> @@ -421,66 +471,25 @@ cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
>
> if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
> ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
> -
> - for (j = 0; j < 4; j++) {
> - uint8_t lnum = 0, loff = 0, shft = 0;
> - struct rte_mbuf *m = mbufs[i + j];
> - uintptr_t laddr;
> - uint16_t segdw;
> - bool sec;
> -
> - txq = (struct cn10k_eth_txq *)
> - txq_data[port[j]][queue[j]];
> - cn10k_nix_tx_skeleton(txq, cmd, flags);
> - /* Perform header writes before barrier
> - * for TSO
> - */
> - if (flags & NIX_TX_OFFLOAD_TSO_F)
> - cn10k_nix_xmit_prepare_tso(m, flags);
> -
> - cn10k_nix_xmit_prepare(m, cmd, flags,
> - txq->lso_tun_fmt, &sec);
> -
> - laddr = lmt_addr;
> - /* Prepare CPT instruction and get nixtx addr if
> - * it is for CPT on same lmtline.
> - */
> - if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
> - cn10k_nix_prep_sec(m, cmd, &laddr,
> - lmt_addr, &lnum,
> - &loff, &shft,
> - txq->sa_base, flags);
> -
> - /* Move NIX desc to LMT/NIXTX area */
> - cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
> -
> - if (flags & NIX_TX_MULTI_SEG_F) {
> - segdw = cn10k_nix_prepare_mseg(m,
> - (uint64_t *)laddr, flags);
> - } else {
> - segdw = cn10k_nix_tx_ext_subs(flags) +
> - 2;
> - }
> -
> - if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
> - pa = txq->cpt_io_addr | 3 << 4;
> - else
> - pa = txq->io_addr | ((segdw - 1) << 4);
> -
> - if (!sched_type)
> - roc_sso_hws_head_wait(base +
> - SSOW_LF_GWS_TAG);
> -
> - roc_lmt_submit_steorl(lmt_id, pa);
> - }
> + for (j = 0; j < 4; j++)
> + cn10k_sso_tx_one(mbufs[i + j], cmd, lmt_id,
> + lmt_addr, sched_type, base,
> + txq_data, flags);
> } else {
> txq = (struct cn10k_eth_txq *)
> txq_data[port[0]][queue[0]];
> - cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd, base
> - + SSOW_LF_GWS_TAG,
> + cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd,
> + base + SSOW_LF_GWS_TAG,
> flags | NIX_TX_VWQE_F);
> }
> }
> +
> + mbufs += i;
> +
> + for (i = 0; i < scalar; i++) {
> + cn10k_sso_tx_one(mbufs[i], cmd, lmt_id, lmt_addr, sched_type,
> + base, txq_data, flags);
> + }
> }
>
> static __rte_always_inline uint16_t
> @@ -489,19 +498,14 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
> const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> const uint32_t flags)
> {
> - uint8_t lnum = 0, loff = 0, shft = 0;
> struct cn10k_eth_txq *txq;
> - uint16_t ref_cnt, segdw;
> struct rte_mbuf *m;
> uintptr_t lmt_addr;
> - uintptr_t c_laddr;
> + uint16_t ref_cnt;
> uint16_t lmt_id;
> - uintptr_t pa;
> - bool sec;
>
> lmt_addr = ws->lmt_base;
> ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
> - c_laddr = lmt_addr;
>
> if (ev->event_type & RTE_EVENT_TYPE_VECTOR) {
> struct rte_mbuf **mbufs = ev->vec->mbufs;
> @@ -526,38 +530,8 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
>
> m = ev->mbuf;
> ref_cnt = m->refcnt;
> - txq = cn10k_sso_hws_xtract_meta(m, txq_data);
> - cn10k_nix_tx_skeleton(txq, cmd, flags);
> - /* Perform header writes before barrier for TSO */
> - if (flags & NIX_TX_OFFLOAD_TSO_F)
> - cn10k_nix_xmit_prepare_tso(m, flags);
> -
> - cn10k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, &sec);
> -
> - /* Prepare CPT instruction and get nixtx addr if
> - * it is for CPT on same lmtline.
> - */
> - if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
> - cn10k_nix_prep_sec(m, cmd, &lmt_addr, c_laddr, &lnum, &loff,
> - &shft, txq->sa_base, flags);
> -
> - /* Move NIX desc to LMT/NIXTX area */
> - cn10k_nix_xmit_mv_lmt_base(lmt_addr, cmd, flags);
> - if (flags & NIX_TX_MULTI_SEG_F) {
> - segdw = cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
> - } else {
> - segdw = cn10k_nix_tx_ext_subs(flags) + 2;
> - }
> -
> - if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
> - pa = txq->cpt_io_addr | 3 << 4;
> - else
> - pa = txq->io_addr | ((segdw - 1) << 4);
> -
> - if (!ev->sched_type)
> - roc_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
> -
> - roc_lmt_submit_steorl(lmt_id, pa);
> + cn10k_sso_tx_one(m, cmd, lmt_id, lmt_addr, ev->sched_type, ws->tx_base,
> + txq_data, flags);
>
> if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> if (ref_cnt > 1)
> --
> 2.33.0
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-10-30 13:36 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-04 8:37 [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow pbhagavatula
2021-10-04 8:37 ` [dpdk-dev] [PATCH 2/3] event/cnxk: reduce workslot memory consumption pbhagavatula
2021-10-04 8:37 ` [dpdk-dev] [PATCH 3/3] event/cnxk: rework enqueue path pbhagavatula
2021-10-30 13:36 ` [dpdk-dev] [PATCH 1/3] event/cnxk: fix packet Tx overflow Jerin Jacob
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).