* [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality
@ 2020-11-22 21:18 pbhagavatula
2021-01-12 8:39 ` [dpdk-dev] [PATCH v2] " pbhagavatula
2021-01-26 9:57 ` [dpdk-dev] [PATCH] " Jerin Jacob
0 siblings, 2 replies; 3+ messages in thread
From: pbhagavatula @ 2020-11-22 21:18 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Anoob Joseph, Nithin Dabilpuram, Kiran Kumar K
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enhance Tx path cache locality, remove current tag type and group
stores from datapath to conserve store buffers.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/event/octeontx2/otx2_evdev.c | 15 ++----
drivers/event/octeontx2/otx2_evdev.h | 24 ++++-----
drivers/event/octeontx2/otx2_worker.c | 42 +++++----------
drivers/event/octeontx2/otx2_worker.h | 32 +++++------
drivers/event/octeontx2/otx2_worker_dual.c | 63 ++++++++++------------
drivers/event/octeontx2/otx2_worker_dual.h | 2 -
drivers/net/octeontx2/otx2_ethdev_sec_tx.h | 9 ++--
7 files changed, 74 insertions(+), 113 deletions(-)
diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
index 0fe014c24..14f16a68f 100644
--- a/drivers/event/octeontx2/otx2_evdev.c
+++ b/drivers/event/octeontx2/otx2_evdev.c
@@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev)
ws->port = i;
base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base);
+ ws->base[0] = base;
vws++;
base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base);
+ ws->base[1] = base;
vws++;
gws_cookie = ssogws_get_cookie(ws);
@@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev)
ws->port = i;
base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12);
sso_set_port_ops(ws, base);
+ ws->base = base;
gws_cookie = ssogws_get_cookie(ws);
gws_cookie->event_dev = event_dev;
@@ -1447,20 +1450,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
ws = event_dev->data->ports[i];
ssogws_reset((struct otx2_ssogws *)&ws->ws_state[0]);
ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]);
- ws->swtag_req = 0;
ws->vws = 0;
- ws->ws_state[0].cur_grp = 0;
- ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
- ws->ws_state[1].cur_grp = 0;
- ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY;
} else {
struct otx2_ssogws *ws;
ws = event_dev->data->ports[i];
ssogws_reset(ws);
- ws->swtag_req = 0;
- ws->cur_grp = 0;
- ws->cur_tt = SSO_SYNC_EMPTY;
}
}
@@ -1479,8 +1474,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
otx2_write64(enable, ws->grps_base[i] +
SSO_LF_GGRP_QCTL);
}
- ws->ws_state[0].cur_grp = 0;
- ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
} else {
struct otx2_ssogws *ws = event_dev->data->ports[0];
@@ -1492,8 +1485,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
otx2_write64(enable, ws->grps_base[i] +
SSO_LF_GGRP_QCTL);
}
- ws->cur_grp = 0;
- ws->cur_tt = SSO_SYNC_EMPTY;
}
/* reset SSO GWS cache */
diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 0513cb81c..e381b9e52 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -80,6 +80,8 @@
#define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
#define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
+#define OTX2_SSOW_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff)
+#define OTX2_SSOW_SWTAG_PEND(x) ((x) & BIT_ULL(62))
#define NSEC2USEC(__ns) ((__ns) / 1E3)
#define USEC2NSEC(__us) ((__us) * 1E3)
@@ -169,25 +171,23 @@ struct otx2_sso_evdev {
uintptr_t wqp_op; \
uintptr_t swtag_flush_op; \
uintptr_t swtag_norm_op; \
- uintptr_t swtag_desched_op; \
- uint8_t cur_tt; \
- uint8_t cur_grp
+ uintptr_t swtag_desched_op;
/* Event port aka GWS */
struct otx2_ssogws {
/* Get Work Fastpath data */
OTX2_SSOGWS_OPS;
- uint8_t swtag_req;
+ /* PTP timestamp */
+ struct otx2_timesync_info *tstamp;
void *lookup_mem;
uint8_t port;
/* Add Work Fastpath data */
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
- /* PTP timestamp */
- struct otx2_timesync_info *tstamp;
/* Tx Fastpath data */
- uint8_t tx_adptr_data[] __rte_cache_aligned;
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct otx2_ssogws_state {
@@ -197,18 +197,18 @@ struct otx2_ssogws_state {
struct otx2_ssogws_dual {
/* Get Work Fastpath data */
struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */
- uint8_t swtag_req;
- uint8_t vws; /* Ping pong bit */
+ /* PTP timestamp */
+ struct otx2_timesync_info *tstamp;
void *lookup_mem;
+ uint8_t vws; /* Ping pong bit */
uint8_t port;
/* Add Work Fastpath data */
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
- /* PTP timestamp */
- struct otx2_timesync_info *tstamp;
/* Tx Fastpath data */
- uint8_t tx_adptr_data[] __rte_cache_aligned;
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
static inline struct otx2_sso_evdev *
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index b098407e0..7ed836c1e 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = ws->cur_tt;
+ const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
/* 96XX model
* cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -41,8 +41,6 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
} else {
otx2_ssogws_swtag_norm(ws, tag, new_tt);
}
-
- ws->swtag_req = 1;
}
static __rte_always_inline void
@@ -64,7 +62,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (ws->cur_grp == grp)
+ if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp)
otx2_ssogws_fwd_swtag(ws, ev);
else
/*
@@ -75,12 +73,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
otx2_ssogws_fwd_group(ws, ev, grp);
}
-static __rte_always_inline void
-otx2_ssogws_release_event(struct otx2_ssogws *ws)
-{
- otx2_ssogws_swtag_flush(ws);
-}
-
#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \
uint16_t __rte_hot \
otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \
@@ -90,8 +82,7 @@ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \
\
RTE_SET_USED(timeout_ticks); \
\
- if (ws->swtag_req) { \
- ws->swtag_req = 0; \
+ if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \
otx2_ssogws_swtag_wait(ws); \
return 1; \
} \
@@ -117,8 +108,7 @@ otx2_ssogws_deq_timeout_ ##name(void *port, struct rte_event *ev, \
uint16_t ret = 1; \
uint64_t iter; \
\
- if (ws->swtag_req) { \
- ws->swtag_req = 0; \
+ if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \
otx2_ssogws_swtag_wait(ws); \
return ret; \
} \
@@ -149,8 +139,7 @@ otx2_ssogws_deq_seg_ ##name(void *port, struct rte_event *ev, \
\
RTE_SET_USED(timeout_ticks); \
\
- if (ws->swtag_req) { \
- ws->swtag_req = 0; \
+ if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \
otx2_ssogws_swtag_wait(ws); \
return 1; \
} \
@@ -177,8 +166,7 @@ otx2_ssogws_deq_seg_timeout_ ##name(void *port, struct rte_event *ev, \
uint16_t ret = 1; \
uint64_t iter; \
\
- if (ws->swtag_req) { \
- ws->swtag_req = 0; \
+ if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \
otx2_ssogws_swtag_wait(ws); \
return ret; \
} \
@@ -221,7 +209,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev)
otx2_ssogws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- otx2_ssogws_release_event(ws);
+ otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
break;
default:
return 0;
@@ -274,14 +262,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \
{ \
struct otx2_ssogws *ws = port; \
uint64_t cmd[sz]; \
- int i; \
\
- for (i = 0; i < nb_events; i++) \
- otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \
+ RTE_SET_USED(nb_events); \
+ return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \
+ (const uint64_t \
(*)[RTE_MAX_QUEUES_PER_PORT]) \
&ws->tx_adptr_data, \
flags); \
- return nb_events; \
}
SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
#undef T
@@ -293,14 +280,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
{ \
uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \
struct otx2_ssogws *ws = port; \
- int i; \
\
- for (i = 0; i < nb_events; i++) \
- otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \
+ RTE_SET_USED(nb_events); \
+ return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \
+ (const uint64_t \
(*)[RTE_MAX_QUEUES_PER_PORT]) \
&ws->tx_adptr_data, \
(flags) | NIX_TX_MULTI_SEG_F); \
- return nb_events; \
}
SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
#undef T
@@ -335,7 +321,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base,
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- otx2_ssogws_swtag_flush(ws);
+ otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
rte_mb();
aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT);
ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT);
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 0a7d6671c..2b716c042 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev,
event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
(event.get_work0 & (0x3FFull << 36)) << 4 |
(event.get_work0 & 0xffffffff);
- ws->cur_tt = event.sched_type;
- ws->cur_grp = event.queue_id;
if (event.sched_type != SSO_TT_EMPTY) {
if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
@@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev,
event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
(event.get_work0 & (0x3FFull << 36)) << 4 |
(event.get_work0 & 0xffffffff);
- ws->cur_tt = event.sched_type;
- ws->cur_grp = event.queue_id;
if (event.sched_type != SSO_TT_EMPTY &&
event.event_type == RTE_EVENT_TYPE_ETHDEV) {
@@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
{
otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
SSOW_LF_GWS_OP_SWTAG_UNTAG);
- ws->cur_tt = SSO_SYNC_UNTAGGED;
}
static __rte_always_inline void
-otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
+otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
{
- if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
- ws->cur_tt = SSO_SYNC_EMPTY;
+ if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY)
return;
- }
- otx2_write64(0, ws->swtag_flush_op);
- ws->cur_tt = SSO_SYNC_EMPTY;
+ otx2_write64(0, flush_op);
}
static __rte_always_inline void
@@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws)
}
static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws)
+otx2_ssogws_head_wait(uint64_t tag_op)
{
#ifdef RTE_ARCH_ARM64
uint64_t tag;
@@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws)
" tbz %[tag], 35, rty%= \n"
"done%=: \n"
: [tag] "=&r" (tag)
- : [tag_op] "r" (ws->tag_op)
+ : [tag_op] "r" (tag_op)
);
#else
/* Wait for the HEAD to be set */
- while (!(otx2_read64(ws->tag_op) & BIT_ULL(35)))
+ while (!(otx2_read64(tag_op) & BIT_ULL(35)))
;
#endif
}
@@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
}
static __rte_always_inline uint16_t
-otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
- uint64_t *cmd,
+otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
@@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
(m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
txq = otx2_ssogws_xtract_meta(m, txq_data);
- return otx2_sec_event_tx(ws, ev, m, txq, flags);
+ return otx2_sec_event_tx(base, ev, m, txq, flags);
}
/* Perform header writes before barrier for TSO */
@@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
m->ol_flags, segdw, flags);
if (!ev->sched_type) {
otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- otx2_ssogws_head_wait(ws);
+ otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
txq->io_addr, segdw);
@@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
if (!ev->sched_type) {
otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- otx2_ssogws_head_wait(ws);
+ otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
otx2_nix_xmit_one(cmd, txq->lmt_addr,
txq->io_addr, flags);
@@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
return 1;
}
- otx2_ssogws_swtag_flush(ws);
+ otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
return 1;
}
diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
index 946488eab..820455788 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.c
+++ b/drivers/event/octeontx2/otx2_worker_dual.c
@@ -26,9 +26,9 @@ static __rte_always_inline void
otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws,
const struct rte_event *ev)
{
+ const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = ws->cur_tt;
/* 96XX model
* cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -59,22 +59,20 @@ otx2_ssogws_dual_fwd_group(struct otx2_ssogws_state *ws,
}
static __rte_always_inline void
-otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws,
- struct otx2_ssogws_state *vws,
+otx2_ssogws_dual_forward_event(struct otx2_ssogws_state *vws,
const struct rte_event *ev)
{
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (vws->cur_grp == grp) {
+ if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) {
otx2_ssogws_dual_fwd_swtag(vws, ev);
- ws->swtag_req = 1;
} else {
- /*
- * Group has been changed for group based work pipelining,
- * Use deschedule/add_work operation to transfer the event to
- * new group/core
- */
+ /*
+ * Group has been changed for group based work pipelining,
+ * Use deschedule/add_work operation to transfer the event to
+ * new group/core
+ */
otx2_ssogws_dual_fwd_group(vws, ev, grp);
}
}
@@ -90,10 +88,10 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev)
rte_smp_mb();
return otx2_ssogws_dual_new_event(ws, ev);
case RTE_EVENT_OP_FORWARD:
- otx2_ssogws_dual_forward_event(ws, vws, ev);
+ otx2_ssogws_dual_forward_event(vws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws);
+ otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op);
break;
default:
return 0;
@@ -135,7 +133,7 @@ otx2_ssogws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
struct otx2_ssogws_state *vws = &ws->ws_state[!ws->vws];
RTE_SET_USED(nb_events);
- otx2_ssogws_dual_forward_event(ws, vws, ev);
+ otx2_ssogws_dual_forward_event(vws, ev);
return 1;
}
@@ -150,10 +148,10 @@ otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev, \
\
rte_prefetch_non_temporal(ws); \
RTE_SET_USED(timeout_ticks); \
- if (ws->swtag_req) { \
+ if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \
+ ws->ws_state[!ws->vws].tag_op))) { \
otx2_ssogws_swtag_wait((struct otx2_ssogws *) \
&ws->ws_state[!ws->vws]); \
- ws->swtag_req = 0; \
return 1; \
} \
\
@@ -184,10 +182,10 @@ otx2_ssogws_dual_deq_timeout_ ##name(void *port, struct rte_event *ev, \
uint64_t iter; \
uint8_t gw; \
\
- if (ws->swtag_req) { \
+ if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \
+ ws->ws_state[!ws->vws].tag_op))) { \
otx2_ssogws_swtag_wait((struct otx2_ssogws *) \
&ws->ws_state[!ws->vws]); \
- ws->swtag_req = 0; \
return 1; \
} \
\
@@ -228,10 +226,10 @@ otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev, \
uint8_t gw; \
\
RTE_SET_USED(timeout_ticks); \
- if (ws->swtag_req) { \
+ if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \
+ ws->ws_state[!ws->vws].tag_op))) { \
otx2_ssogws_swtag_wait((struct otx2_ssogws *) \
&ws->ws_state[!ws->vws]); \
- ws->swtag_req = 0; \
return 1; \
} \
\
@@ -266,10 +264,10 @@ otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port, \
uint64_t iter; \
uint8_t gw; \
\
- if (ws->swtag_req) { \
+ if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \
+ ws->ws_state[!ws->vws].tag_op))) { \
otx2_ssogws_swtag_wait((struct otx2_ssogws *) \
&ws->ws_state[!ws->vws]); \
- ws->swtag_req = 0; \
return 1; \
} \
\
@@ -314,15 +312,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \
uint16_t nb_events) \
{ \
struct otx2_ssogws_dual *ws = port; \
- struct otx2_ssogws *vws = \
- (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \
uint64_t cmd[sz]; \
\
RTE_SET_USED(nb_events); \
- return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \
- (*)[RTE_MAX_QUEUES_PER_PORT]) \
- ws->tx_adptr_data, \
- flags); \
+ return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \
+ cmd, (const uint64_t \
+ (*)[RTE_MAX_QUEUES_PER_PORT]) \
+ &ws->tx_adptr_data, flags); \
}
SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
#undef T
@@ -333,16 +329,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \
struct rte_event ev[], \
uint16_t nb_events) \
{ \
- struct otx2_ssogws_dual *ws = port; \
- struct otx2_ssogws *vws = \
- (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \
uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct otx2_ssogws_dual *ws = port; \
\
RTE_SET_USED(nb_events); \
- return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \
- (*)[RTE_MAX_QUEUES_PER_PORT]) \
- ws->tx_adptr_data, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \
+ cmd, (const uint64_t \
+ (*)[RTE_MAX_QUEUES_PER_PORT]) \
+ &ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F);\
}
SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
#undef T
diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h
index 6e6061821..72b616439 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.h
+++ b/drivers/event/octeontx2/otx2_worker_dual.h
@@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws,
event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
(event.get_work0 & (0x3FFull << 36)) << 4 |
(event.get_work0 & 0xffffffff);
- ws->cur_tt = event.sched_type;
- ws->cur_grp = event.queue_id;
if (event.sched_type != SSO_TT_EMPTY) {
if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
index 284bcd536..c8eae3d62 100644
--- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
+++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
@@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess,
}
static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws);
+otx2_ssogws_head_wait(uint64_t base);
static __rte_always_inline int
-otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
- struct rte_mbuf *m, const struct otx2_eth_txq *txq,
- const uint32_t offload_flags)
+otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m,
+ const struct otx2_eth_txq *txq, const uint32_t offload_flags)
{
uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail;
struct otx2_sec_session_ipsec_ip *sess;
@@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
if (!ev->sched_type)
- otx2_ssogws_head_wait(ws);
+ otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
inst.param1 = sess->esn_hi >> 16;
inst.param2 = sess->esn_hi & 0xffff;
--
2.17.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* [dpdk-dev] [PATCH v2] event/octeontx2: enhance Tx path cache locality
2020-11-22 21:18 [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality pbhagavatula
@ 2021-01-12 8:39 ` pbhagavatula
2021-01-26 9:57 ` [dpdk-dev] [PATCH] " Jerin Jacob
1 sibling, 0 replies; 3+ messages in thread
From: pbhagavatula @ 2021-01-12 8:39 UTC (permalink / raw)
To: jerinj, Pavan Nikhilesh, Anoob Joseph, Nithin Dabilpuram, Kiran Kumar K
Cc: dev
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Enhance Tx path cache locality, remove current tag type and group
stores from datapath to conserve store buffers.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
- SWTAG can complete asynchronously, we need to maintain swtag_req in ws
datastructure.
drivers/event/octeontx2/otx2_evdev.c | 13 ++------
drivers/event/octeontx2/otx2_evdev.h | 23 +++++++------
drivers/event/octeontx2/otx2_worker.c | 28 ++++++----------
drivers/event/octeontx2/otx2_worker.h | 32 +++++++-----------
drivers/event/octeontx2/otx2_worker_dual.c | 39 ++++++++++------------
drivers/event/octeontx2/otx2_worker_dual.h | 2 --
drivers/net/octeontx2/otx2_ethdev_sec_tx.h | 9 +++--
7 files changed, 59 insertions(+), 87 deletions(-)
diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
index 0fe014c24..80a786f21 100644
--- a/drivers/event/octeontx2/otx2_evdev.c
+++ b/drivers/event/octeontx2/otx2_evdev.c
@@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev)
ws->port = i;
base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base);
+ ws->base[0] = base;
vws++;
base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base);
+ ws->base[1] = base;
vws++;
gws_cookie = ssogws_get_cookie(ws);
@@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev)
ws->port = i;
base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12);
sso_set_port_ops(ws, base);
+ ws->base = base;
gws_cookie = ssogws_get_cookie(ws);
gws_cookie->event_dev = event_dev;
@@ -1449,18 +1452,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]);
ws->swtag_req = 0;
ws->vws = 0;
- ws->ws_state[0].cur_grp = 0;
- ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
- ws->ws_state[1].cur_grp = 0;
- ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY;
} else {
struct otx2_ssogws *ws;
ws = event_dev->data->ports[i];
ssogws_reset(ws);
ws->swtag_req = 0;
- ws->cur_grp = 0;
- ws->cur_tt = SSO_SYNC_EMPTY;
}
}
@@ -1479,8 +1476,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
otx2_write64(enable, ws->grps_base[i] +
SSO_LF_GGRP_QCTL);
}
- ws->ws_state[0].cur_grp = 0;
- ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
} else {
struct otx2_ssogws *ws = event_dev->data->ports[0];
@@ -1492,8 +1487,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
otx2_write64(enable, ws->grps_base[i] +
SSO_LF_GGRP_QCTL);
}
- ws->cur_grp = 0;
- ws->cur_tt = SSO_SYNC_EMPTY;
}
/* reset SSO GWS cache */
diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
index 0513cb81c..ed9cbc86b 100644
--- a/drivers/event/octeontx2/otx2_evdev.h
+++ b/drivers/event/octeontx2/otx2_evdev.h
@@ -80,6 +80,7 @@
#define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
#define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
+#define OTX2_SSOW_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff)
#define NSEC2USEC(__ns) ((__ns) / 1E3)
#define USEC2NSEC(__us) ((__us) * 1E3)
@@ -169,25 +170,24 @@ struct otx2_sso_evdev {
uintptr_t wqp_op; \
uintptr_t swtag_flush_op; \
uintptr_t swtag_norm_op; \
- uintptr_t swtag_desched_op; \
- uint8_t cur_tt; \
- uint8_t cur_grp
+ uintptr_t swtag_desched_op;
/* Event port aka GWS */
struct otx2_ssogws {
/* Get Work Fastpath data */
OTX2_SSOGWS_OPS;
- uint8_t swtag_req;
+ /* PTP timestamp */
+ struct otx2_timesync_info *tstamp;
void *lookup_mem;
+ uint8_t swtag_req;
uint8_t port;
/* Add Work Fastpath data */
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
- /* PTP timestamp */
- struct otx2_timesync_info *tstamp;
/* Tx Fastpath data */
- uint8_t tx_adptr_data[] __rte_cache_aligned;
+ uint64_t base __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
struct otx2_ssogws_state {
@@ -197,18 +197,19 @@ struct otx2_ssogws_state {
struct otx2_ssogws_dual {
/* Get Work Fastpath data */
struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */
+ /* PTP timestamp */
+ struct otx2_timesync_info *tstamp;
+ void *lookup_mem;
uint8_t swtag_req;
uint8_t vws; /* Ping pong bit */
- void *lookup_mem;
uint8_t port;
/* Add Work Fastpath data */
uint64_t xaq_lmt __rte_cache_aligned;
uint64_t *fc_mem;
uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
- /* PTP timestamp */
- struct otx2_timesync_info *tstamp;
/* Tx Fastpath data */
- uint8_t tx_adptr_data[] __rte_cache_aligned;
+ uint64_t base[2] __rte_cache_aligned;
+ uint8_t tx_adptr_data[];
} __rte_cache_aligned;
static inline struct otx2_sso_evdev *
diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
index b098407e0..95139d27a 100644
--- a/drivers/event/octeontx2/otx2_worker.c
+++ b/drivers/event/octeontx2/otx2_worker.c
@@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = ws->cur_tt;
+ const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
/* 96XX model
* cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -64,7 +64,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (ws->cur_grp == grp)
+ if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp)
otx2_ssogws_fwd_swtag(ws, ev);
else
/*
@@ -75,12 +75,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
otx2_ssogws_fwd_group(ws, ev, grp);
}
-static __rte_always_inline void
-otx2_ssogws_release_event(struct otx2_ssogws *ws)
-{
- otx2_ssogws_swtag_flush(ws);
-}
-
#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \
uint16_t __rte_hot \
otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \
@@ -221,7 +215,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev)
otx2_ssogws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- otx2_ssogws_release_event(ws);
+ otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
break;
default:
return 0;
@@ -274,14 +268,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \
{ \
struct otx2_ssogws *ws = port; \
uint64_t cmd[sz]; \
- int i; \
\
- for (i = 0; i < nb_events; i++) \
- otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \
+ RTE_SET_USED(nb_events); \
+ return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \
+ (const uint64_t \
(*)[RTE_MAX_QUEUES_PER_PORT]) \
&ws->tx_adptr_data, \
flags); \
- return nb_events; \
}
SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
#undef T
@@ -293,14 +286,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
{ \
uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \
struct otx2_ssogws *ws = port; \
- int i; \
\
- for (i = 0; i < nb_events; i++) \
- otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \
+ RTE_SET_USED(nb_events); \
+ return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \
+ (const uint64_t \
(*)[RTE_MAX_QUEUES_PER_PORT]) \
&ws->tx_adptr_data, \
(flags) | NIX_TX_MULTI_SEG_F); \
- return nb_events; \
}
SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
#undef T
@@ -335,7 +327,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base,
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
- otx2_ssogws_swtag_flush(ws);
+ otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
rte_mb();
aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT);
ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT);
diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index 0a7d6671c..2b716c042 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev,
event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
(event.get_work0 & (0x3FFull << 36)) << 4 |
(event.get_work0 & 0xffffffff);
- ws->cur_tt = event.sched_type;
- ws->cur_grp = event.queue_id;
if (event.sched_type != SSO_TT_EMPTY) {
if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
@@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev,
event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
(event.get_work0 & (0x3FFull << 36)) << 4 |
(event.get_work0 & 0xffffffff);
- ws->cur_tt = event.sched_type;
- ws->cur_grp = event.queue_id;
if (event.sched_type != SSO_TT_EMPTY &&
event.event_type == RTE_EVENT_TYPE_ETHDEV) {
@@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
{
otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
SSOW_LF_GWS_OP_SWTAG_UNTAG);
- ws->cur_tt = SSO_SYNC_UNTAGGED;
}
static __rte_always_inline void
-otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
+otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
{
- if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
- ws->cur_tt = SSO_SYNC_EMPTY;
+ if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY)
return;
- }
- otx2_write64(0, ws->swtag_flush_op);
- ws->cur_tt = SSO_SYNC_EMPTY;
+ otx2_write64(0, flush_op);
}
static __rte_always_inline void
@@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws)
}
static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws)
+otx2_ssogws_head_wait(uint64_t tag_op)
{
#ifdef RTE_ARCH_ARM64
uint64_t tag;
@@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws)
" tbz %[tag], 35, rty%= \n"
"done%=: \n"
: [tag] "=&r" (tag)
- : [tag_op] "r" (ws->tag_op)
+ : [tag_op] "r" (tag_op)
);
#else
/* Wait for the HEAD to be set */
- while (!(otx2_read64(ws->tag_op) & BIT_ULL(35)))
+ while (!(otx2_read64(tag_op) & BIT_ULL(35)))
;
#endif
}
@@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
}
static __rte_always_inline uint16_t
-otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
- uint64_t *cmd,
+otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
const uint32_t flags)
{
@@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
(m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
txq = otx2_ssogws_xtract_meta(m, txq_data);
- return otx2_sec_event_tx(ws, ev, m, txq, flags);
+ return otx2_sec_event_tx(base, ev, m, txq, flags);
}
/* Perform header writes before barrier for TSO */
@@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
m->ol_flags, segdw, flags);
if (!ev->sched_type) {
otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- otx2_ssogws_head_wait(ws);
+ otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
txq->io_addr, segdw);
@@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
if (!ev->sched_type) {
otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- otx2_ssogws_head_wait(ws);
+ otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
otx2_nix_xmit_one(cmd, txq->lmt_addr,
txq->io_addr, flags);
@@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
return 1;
}
- otx2_ssogws_swtag_flush(ws);
+ otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG,
+ base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
return 1;
}
diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
index 946488eab..81af4ca90 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.c
+++ b/drivers/event/octeontx2/otx2_worker_dual.c
@@ -26,9 +26,9 @@ static __rte_always_inline void
otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws,
const struct rte_event *ev)
{
+ const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt = ws->cur_tt;
/* 96XX model
* cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -66,15 +66,15 @@ otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (vws->cur_grp == grp) {
+ if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) {
otx2_ssogws_dual_fwd_swtag(vws, ev);
ws->swtag_req = 1;
} else {
- /*
- * Group has been changed for group based work pipelining,
- * Use deschedule/add_work operation to transfer the event to
- * new group/core
- */
+ /*
+ * Group has been changed for group based work pipelining,
+ * Use deschedule/add_work operation to transfer the event to
+ * new group/core
+ */
otx2_ssogws_dual_fwd_group(vws, ev, grp);
}
}
@@ -93,7 +93,7 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev)
otx2_ssogws_dual_forward_event(ws, vws, ev);
break;
case RTE_EVENT_OP_RELEASE:
- otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws);
+ otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op);
break;
default:
return 0;
@@ -314,15 +314,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \
uint16_t nb_events) \
{ \
struct otx2_ssogws_dual *ws = port; \
- struct otx2_ssogws *vws = \
- (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \
uint64_t cmd[sz]; \
\
RTE_SET_USED(nb_events); \
- return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \
- (*)[RTE_MAX_QUEUES_PER_PORT]) \
- ws->tx_adptr_data, \
- flags); \
+ return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \
+ cmd, (const uint64_t \
+ (*)[RTE_MAX_QUEUES_PER_PORT]) \
+ &ws->tx_adptr_data, flags); \
}
SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
#undef T
@@ -333,16 +331,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \
struct rte_event ev[], \
uint16_t nb_events) \
{ \
- struct otx2_ssogws_dual *ws = port; \
- struct otx2_ssogws *vws = \
- (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \
uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \
+ struct otx2_ssogws_dual *ws = port; \
\
RTE_SET_USED(nb_events); \
- return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \
- (*)[RTE_MAX_QUEUES_PER_PORT]) \
- ws->tx_adptr_data, \
- (flags) | NIX_TX_MULTI_SEG_F); \
+ return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \
+ cmd, (const uint64_t \
+ (*)[RTE_MAX_QUEUES_PER_PORT]) \
+ &ws->tx_adptr_data, \
+ (flags) | NIX_TX_MULTI_SEG_F);\
}
SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
#undef T
diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h
index 6e6061821..72b616439 100644
--- a/drivers/event/octeontx2/otx2_worker_dual.h
+++ b/drivers/event/octeontx2/otx2_worker_dual.h
@@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws,
event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
(event.get_work0 & (0x3FFull << 36)) << 4 |
(event.get_work0 & 0xffffffff);
- ws->cur_tt = event.sched_type;
- ws->cur_grp = event.queue_id;
if (event.sched_type != SSO_TT_EMPTY) {
if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
index 284bcd536..c8eae3d62 100644
--- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
+++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
@@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess,
}
static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws);
+otx2_ssogws_head_wait(uint64_t base);
static __rte_always_inline int
-otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
- struct rte_mbuf *m, const struct otx2_eth_txq *txq,
- const uint32_t offload_flags)
+otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m,
+ const struct otx2_eth_txq *txq, const uint32_t offload_flags)
{
uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail;
struct otx2_sec_session_ipsec_ip *sess;
@@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
if (!ev->sched_type)
- otx2_ssogws_head_wait(ws);
+ otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
inst.param1 = sess->esn_hi >> 16;
inst.param2 = sess->esn_hi & 0xffff;
--
2.17.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality
2020-11-22 21:18 [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality pbhagavatula
2021-01-12 8:39 ` [dpdk-dev] [PATCH v2] " pbhagavatula
@ 2021-01-26 9:57 ` Jerin Jacob
1 sibling, 0 replies; 3+ messages in thread
From: Jerin Jacob @ 2021-01-26 9:57 UTC (permalink / raw)
To: Pavan Nikhilesh
Cc: Jerin Jacob, Anoob Joseph, Nithin Dabilpuram, Kiran Kumar K, dpdk-dev
On Mon, Nov 23, 2020 at 2:48 AM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Enhance Tx path cache locality, remove current tag type and group
> stores from datapath to conserve store buffers.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Applied to dpdk-next-net-eventdev/for-main. Thanks
> drivers/event/octeontx2/otx2_evdev.c | 15 ++----
> drivers/event/octeontx2/otx2_evdev.h | 24 ++++-----
> drivers/event/octeontx2/otx2_worker.c | 42 +++++----------
> drivers/event/octeontx2/otx2_worker.h | 32 +++++------
> drivers/event/octeontx2/otx2_worker_dual.c | 63 ++++++++++------------
> drivers/event/octeontx2/otx2_worker_dual.h | 2 -
> drivers/net/octeontx2/otx2_ethdev_sec_tx.h | 9 ++--
> 7 files changed, 74 insertions(+), 113 deletions(-)
>
> diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c
> index 0fe014c24..14f16a68f 100644
> --- a/drivers/event/octeontx2/otx2_evdev.c
> +++ b/drivers/event/octeontx2/otx2_evdev.c
> @@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev)
> ws->port = i;
> base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
> sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base);
> + ws->base[0] = base;
> vws++;
>
> base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
> sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base);
> + ws->base[1] = base;
> vws++;
>
> gws_cookie = ssogws_get_cookie(ws);
> @@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev)
> ws->port = i;
> base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12);
> sso_set_port_ops(ws, base);
> + ws->base = base;
>
> gws_cookie = ssogws_get_cookie(ws);
> gws_cookie->event_dev = event_dev;
> @@ -1447,20 +1450,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
> ws = event_dev->data->ports[i];
> ssogws_reset((struct otx2_ssogws *)&ws->ws_state[0]);
> ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]);
> - ws->swtag_req = 0;
> ws->vws = 0;
> - ws->ws_state[0].cur_grp = 0;
> - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
> - ws->ws_state[1].cur_grp = 0;
> - ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY;
> } else {
> struct otx2_ssogws *ws;
>
> ws = event_dev->data->ports[i];
> ssogws_reset(ws);
> - ws->swtag_req = 0;
> - ws->cur_grp = 0;
> - ws->cur_tt = SSO_SYNC_EMPTY;
> }
> }
>
> @@ -1479,8 +1474,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
> otx2_write64(enable, ws->grps_base[i] +
> SSO_LF_GGRP_QCTL);
> }
> - ws->ws_state[0].cur_grp = 0;
> - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
> } else {
> struct otx2_ssogws *ws = event_dev->data->ports[0];
>
> @@ -1492,8 +1485,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
> otx2_write64(enable, ws->grps_base[i] +
> SSO_LF_GGRP_QCTL);
> }
> - ws->cur_grp = 0;
> - ws->cur_tt = SSO_SYNC_EMPTY;
> }
>
> /* reset SSO GWS cache */
> diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h
> index 0513cb81c..e381b9e52 100644
> --- a/drivers/event/octeontx2/otx2_evdev.h
> +++ b/drivers/event/octeontx2/otx2_evdev.h
> @@ -80,6 +80,8 @@
>
> #define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
> #define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY)
> +#define OTX2_SSOW_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff)
> +#define OTX2_SSOW_SWTAG_PEND(x) ((x) & BIT_ULL(62))
>
> #define NSEC2USEC(__ns) ((__ns) / 1E3)
> #define USEC2NSEC(__us) ((__us) * 1E3)
> @@ -169,25 +171,23 @@ struct otx2_sso_evdev {
> uintptr_t wqp_op; \
> uintptr_t swtag_flush_op; \
> uintptr_t swtag_norm_op; \
> - uintptr_t swtag_desched_op; \
> - uint8_t cur_tt; \
> - uint8_t cur_grp
> + uintptr_t swtag_desched_op;
>
> /* Event port aka GWS */
> struct otx2_ssogws {
> /* Get Work Fastpath data */
> OTX2_SSOGWS_OPS;
> - uint8_t swtag_req;
> + /* PTP timestamp */
> + struct otx2_timesync_info *tstamp;
> void *lookup_mem;
> uint8_t port;
> /* Add Work Fastpath data */
> uint64_t xaq_lmt __rte_cache_aligned;
> uint64_t *fc_mem;
> uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
> - /* PTP timestamp */
> - struct otx2_timesync_info *tstamp;
> /* Tx Fastpath data */
> - uint8_t tx_adptr_data[] __rte_cache_aligned;
> + uint64_t base __rte_cache_aligned;
> + uint8_t tx_adptr_data[];
> } __rte_cache_aligned;
>
> struct otx2_ssogws_state {
> @@ -197,18 +197,18 @@ struct otx2_ssogws_state {
> struct otx2_ssogws_dual {
> /* Get Work Fastpath data */
> struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */
> - uint8_t swtag_req;
> - uint8_t vws; /* Ping pong bit */
> + /* PTP timestamp */
> + struct otx2_timesync_info *tstamp;
> void *lookup_mem;
> + uint8_t vws; /* Ping pong bit */
> uint8_t port;
> /* Add Work Fastpath data */
> uint64_t xaq_lmt __rte_cache_aligned;
> uint64_t *fc_mem;
> uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
> - /* PTP timestamp */
> - struct otx2_timesync_info *tstamp;
> /* Tx Fastpath data */
> - uint8_t tx_adptr_data[] __rte_cache_aligned;
> + uint64_t base[2] __rte_cache_aligned;
> + uint8_t tx_adptr_data[];
> } __rte_cache_aligned;
>
> static inline struct otx2_sso_evdev *
> diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c
> index b098407e0..7ed836c1e 100644
> --- a/drivers/event/octeontx2/otx2_worker.c
> +++ b/drivers/event/octeontx2/otx2_worker.c
> @@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
> {
> const uint32_t tag = (uint32_t)ev->event;
> const uint8_t new_tt = ev->sched_type;
> - const uint8_t cur_tt = ws->cur_tt;
> + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
>
> /* 96XX model
> * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
> @@ -41,8 +41,6 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
> } else {
> otx2_ssogws_swtag_norm(ws, tag, new_tt);
> }
> -
> - ws->swtag_req = 1;
> }
>
> static __rte_always_inline void
> @@ -64,7 +62,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
> const uint8_t grp = ev->queue_id;
>
> /* Group hasn't changed, Use SWTAG to forward the event */
> - if (ws->cur_grp == grp)
> + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp)
> otx2_ssogws_fwd_swtag(ws, ev);
> else
> /*
> @@ -75,12 +73,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
> otx2_ssogws_fwd_group(ws, ev, grp);
> }
>
> -static __rte_always_inline void
> -otx2_ssogws_release_event(struct otx2_ssogws *ws)
> -{
> - otx2_ssogws_swtag_flush(ws);
> -}
> -
> #define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \
> uint16_t __rte_hot \
> otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \
> @@ -90,8 +82,7 @@ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \
> \
> RTE_SET_USED(timeout_ticks); \
> \
> - if (ws->swtag_req) { \
> - ws->swtag_req = 0; \
> + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \
> otx2_ssogws_swtag_wait(ws); \
> return 1; \
> } \
> @@ -117,8 +108,7 @@ otx2_ssogws_deq_timeout_ ##name(void *port, struct rte_event *ev, \
> uint16_t ret = 1; \
> uint64_t iter; \
> \
> - if (ws->swtag_req) { \
> - ws->swtag_req = 0; \
> + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \
> otx2_ssogws_swtag_wait(ws); \
> return ret; \
> } \
> @@ -149,8 +139,7 @@ otx2_ssogws_deq_seg_ ##name(void *port, struct rte_event *ev, \
> \
> RTE_SET_USED(timeout_ticks); \
> \
> - if (ws->swtag_req) { \
> - ws->swtag_req = 0; \
> + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \
> otx2_ssogws_swtag_wait(ws); \
> return 1; \
> } \
> @@ -177,8 +166,7 @@ otx2_ssogws_deq_seg_timeout_ ##name(void *port, struct rte_event *ev, \
> uint16_t ret = 1; \
> uint64_t iter; \
> \
> - if (ws->swtag_req) { \
> - ws->swtag_req = 0; \
> + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \
> otx2_ssogws_swtag_wait(ws); \
> return ret; \
> } \
> @@ -221,7 +209,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev)
> otx2_ssogws_forward_event(ws, ev);
> break;
> case RTE_EVENT_OP_RELEASE:
> - otx2_ssogws_release_event(ws);
> + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
> break;
> default:
> return 0;
> @@ -274,14 +262,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \
> { \
> struct otx2_ssogws *ws = port; \
> uint64_t cmd[sz]; \
> - int i; \
> \
> - for (i = 0; i < nb_events; i++) \
> - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \
> + RTE_SET_USED(nb_events); \
> + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \
> + (const uint64_t \
> (*)[RTE_MAX_QUEUES_PER_PORT]) \
> &ws->tx_adptr_data, \
> flags); \
> - return nb_events; \
> }
> SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
> #undef T
> @@ -293,14 +280,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
> { \
> uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \
> struct otx2_ssogws *ws = port; \
> - int i; \
> \
> - for (i = 0; i < nb_events; i++) \
> - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \
> + RTE_SET_USED(nb_events); \
> + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \
> + (const uint64_t \
> (*)[RTE_MAX_QUEUES_PER_PORT]) \
> &ws->tx_adptr_data, \
> (flags) | NIX_TX_MULTI_SEG_F); \
> - return nb_events; \
> }
> SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
> #undef T
> @@ -335,7 +321,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base,
> if (fn != NULL && ev.u64 != 0)
> fn(arg, ev);
> if (ev.sched_type != SSO_TT_EMPTY)
> - otx2_ssogws_swtag_flush(ws);
> + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
> rte_mb();
> aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT);
> ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT);
> diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
> index 0a7d6671c..2b716c042 100644
> --- a/drivers/event/octeontx2/otx2_worker.h
> +++ b/drivers/event/octeontx2/otx2_worker.h
> @@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev,
> event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
> (event.get_work0 & (0x3FFull << 36)) << 4 |
> (event.get_work0 & 0xffffffff);
> - ws->cur_tt = event.sched_type;
> - ws->cur_grp = event.queue_id;
>
> if (event.sched_type != SSO_TT_EMPTY) {
> if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
> @@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev,
> event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
> (event.get_work0 & (0x3FFull << 36)) << 4 |
> (event.get_work0 & 0xffffffff);
> - ws->cur_tt = event.sched_type;
> - ws->cur_grp = event.queue_id;
>
> if (event.sched_type != SSO_TT_EMPTY &&
> event.event_type == RTE_EVENT_TYPE_ETHDEV) {
> @@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
> {
> otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
> SSOW_LF_GWS_OP_SWTAG_UNTAG);
> - ws->cur_tt = SSO_SYNC_UNTAGGED;
> }
>
> static __rte_always_inline void
> -otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
> +otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
> {
> - if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
> - ws->cur_tt = SSO_SYNC_EMPTY;
> + if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY)
> return;
> - }
> - otx2_write64(0, ws->swtag_flush_op);
> - ws->cur_tt = SSO_SYNC_EMPTY;
> + otx2_write64(0, flush_op);
> }
>
> static __rte_always_inline void
> @@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws)
> }
>
> static __rte_always_inline void
> -otx2_ssogws_head_wait(struct otx2_ssogws *ws)
> +otx2_ssogws_head_wait(uint64_t tag_op)
> {
> #ifdef RTE_ARCH_ARM64
> uint64_t tag;
> @@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws)
> " tbz %[tag], 35, rty%= \n"
> "done%=: \n"
> : [tag] "=&r" (tag)
> - : [tag_op] "r" (ws->tag_op)
> + : [tag_op] "r" (tag_op)
> );
> #else
> /* Wait for the HEAD to be set */
> - while (!(otx2_read64(ws->tag_op) & BIT_ULL(35)))
> + while (!(otx2_read64(tag_op) & BIT_ULL(35)))
> ;
> #endif
> }
> @@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
> }
>
> static __rte_always_inline uint16_t
> -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> - uint64_t *cmd,
> +otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
> const uint32_t flags)
> {
> @@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
> (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
> txq = otx2_ssogws_xtract_meta(m, txq_data);
> - return otx2_sec_event_tx(ws, ev, m, txq, flags);
> + return otx2_sec_event_tx(base, ev, m, txq, flags);
> }
>
> /* Perform header writes before barrier for TSO */
> @@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> m->ol_flags, segdw, flags);
> if (!ev->sched_type) {
> otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
> - otx2_ssogws_head_wait(ws);
> + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
> if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
> otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
> txq->io_addr, segdw);
> @@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
>
> if (!ev->sched_type) {
> otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
> - otx2_ssogws_head_wait(ws);
> + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
> if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
> otx2_nix_xmit_one(cmd, txq->lmt_addr,
> txq->io_addr, flags);
> @@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> return 1;
> }
>
> - otx2_ssogws_swtag_flush(ws);
> + otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG,
> + base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
>
> return 1;
> }
> diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c
> index 946488eab..820455788 100644
> --- a/drivers/event/octeontx2/otx2_worker_dual.c
> +++ b/drivers/event/octeontx2/otx2_worker_dual.c
> @@ -26,9 +26,9 @@ static __rte_always_inline void
> otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws,
> const struct rte_event *ev)
> {
> + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
> const uint32_t tag = (uint32_t)ev->event;
> const uint8_t new_tt = ev->sched_type;
> - const uint8_t cur_tt = ws->cur_tt;
>
> /* 96XX model
> * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
> @@ -59,22 +59,20 @@ otx2_ssogws_dual_fwd_group(struct otx2_ssogws_state *ws,
> }
>
> static __rte_always_inline void
> -otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws,
> - struct otx2_ssogws_state *vws,
> +otx2_ssogws_dual_forward_event(struct otx2_ssogws_state *vws,
> const struct rte_event *ev)
> {
> const uint8_t grp = ev->queue_id;
>
> /* Group hasn't changed, Use SWTAG to forward the event */
> - if (vws->cur_grp == grp) {
> + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) {
> otx2_ssogws_dual_fwd_swtag(vws, ev);
> - ws->swtag_req = 1;
> } else {
> - /*
> - * Group has been changed for group based work pipelining,
> - * Use deschedule/add_work operation to transfer the event to
> - * new group/core
> - */
> + /*
> + * Group has been changed for group based work pipelining,
> + * Use deschedule/add_work operation to transfer the event to
> + * new group/core
> + */
> otx2_ssogws_dual_fwd_group(vws, ev, grp);
> }
> }
> @@ -90,10 +88,10 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev)
> rte_smp_mb();
> return otx2_ssogws_dual_new_event(ws, ev);
> case RTE_EVENT_OP_FORWARD:
> - otx2_ssogws_dual_forward_event(ws, vws, ev);
> + otx2_ssogws_dual_forward_event(vws, ev);
> break;
> case RTE_EVENT_OP_RELEASE:
> - otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws);
> + otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op);
> break;
> default:
> return 0;
> @@ -135,7 +133,7 @@ otx2_ssogws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
> struct otx2_ssogws_state *vws = &ws->ws_state[!ws->vws];
>
> RTE_SET_USED(nb_events);
> - otx2_ssogws_dual_forward_event(ws, vws, ev);
> + otx2_ssogws_dual_forward_event(vws, ev);
>
> return 1;
> }
> @@ -150,10 +148,10 @@ otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev, \
> \
> rte_prefetch_non_temporal(ws); \
> RTE_SET_USED(timeout_ticks); \
> - if (ws->swtag_req) { \
> + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \
> + ws->ws_state[!ws->vws].tag_op))) { \
> otx2_ssogws_swtag_wait((struct otx2_ssogws *) \
> &ws->ws_state[!ws->vws]); \
> - ws->swtag_req = 0; \
> return 1; \
> } \
> \
> @@ -184,10 +182,10 @@ otx2_ssogws_dual_deq_timeout_ ##name(void *port, struct rte_event *ev, \
> uint64_t iter; \
> uint8_t gw; \
> \
> - if (ws->swtag_req) { \
> + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \
> + ws->ws_state[!ws->vws].tag_op))) { \
> otx2_ssogws_swtag_wait((struct otx2_ssogws *) \
> &ws->ws_state[!ws->vws]); \
> - ws->swtag_req = 0; \
> return 1; \
> } \
> \
> @@ -228,10 +226,10 @@ otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev, \
> uint8_t gw; \
> \
> RTE_SET_USED(timeout_ticks); \
> - if (ws->swtag_req) { \
> + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \
> + ws->ws_state[!ws->vws].tag_op))) { \
> otx2_ssogws_swtag_wait((struct otx2_ssogws *) \
> &ws->ws_state[!ws->vws]); \
> - ws->swtag_req = 0; \
> return 1; \
> } \
> \
> @@ -266,10 +264,10 @@ otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port, \
> uint64_t iter; \
> uint8_t gw; \
> \
> - if (ws->swtag_req) { \
> + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \
> + ws->ws_state[!ws->vws].tag_op))) { \
> otx2_ssogws_swtag_wait((struct otx2_ssogws *) \
> &ws->ws_state[!ws->vws]); \
> - ws->swtag_req = 0; \
> return 1; \
> } \
> \
> @@ -314,15 +312,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \
> uint16_t nb_events) \
> { \
> struct otx2_ssogws_dual *ws = port; \
> - struct otx2_ssogws *vws = \
> - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \
> uint64_t cmd[sz]; \
> \
> RTE_SET_USED(nb_events); \
> - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \
> - (*)[RTE_MAX_QUEUES_PER_PORT]) \
> - ws->tx_adptr_data, \
> - flags); \
> + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \
> + cmd, (const uint64_t \
> + (*)[RTE_MAX_QUEUES_PER_PORT]) \
> + &ws->tx_adptr_data, flags); \
> }
> SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
> #undef T
> @@ -333,16 +329,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \
> struct rte_event ev[], \
> uint16_t nb_events) \
> { \
> - struct otx2_ssogws_dual *ws = port; \
> - struct otx2_ssogws *vws = \
> - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \
> uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \
> + struct otx2_ssogws_dual *ws = port; \
> \
> RTE_SET_USED(nb_events); \
> - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \
> - (*)[RTE_MAX_QUEUES_PER_PORT]) \
> - ws->tx_adptr_data, \
> - (flags) | NIX_TX_MULTI_SEG_F); \
> + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \
> + cmd, (const uint64_t \
> + (*)[RTE_MAX_QUEUES_PER_PORT]) \
> + &ws->tx_adptr_data, \
> + (flags) | NIX_TX_MULTI_SEG_F);\
> }
> SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
> #undef T
> diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h
> index 6e6061821..72b616439 100644
> --- a/drivers/event/octeontx2/otx2_worker_dual.h
> +++ b/drivers/event/octeontx2/otx2_worker_dual.h
> @@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws,
> event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
> (event.get_work0 & (0x3FFull << 36)) << 4 |
> (event.get_work0 & 0xffffffff);
> - ws->cur_tt = event.sched_type;
> - ws->cur_grp = event.queue_id;
>
> if (event.sched_type != SSO_TT_EMPTY) {
> if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
> diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
> index 284bcd536..c8eae3d62 100644
> --- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
> +++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
> @@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess,
> }
>
> static __rte_always_inline void
> -otx2_ssogws_head_wait(struct otx2_ssogws *ws);
> +otx2_ssogws_head_wait(uint64_t base);
>
> static __rte_always_inline int
> -otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> - struct rte_mbuf *m, const struct otx2_eth_txq *txq,
> - const uint32_t offload_flags)
> +otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m,
> + const struct otx2_eth_txq *txq, const uint32_t offload_flags)
> {
> uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail;
> struct otx2_sec_session_ipsec_ip *sess;
> @@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
> __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
>
> if (!ev->sched_type)
> - otx2_ssogws_head_wait(ws);
> + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
>
> inst.param1 = sess->esn_hi >> 16;
> inst.param2 = sess->esn_hi & 0xffff;
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-01-26 9:58 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-22 21:18 [dpdk-dev] [PATCH] event/octeontx2: enhance Tx path cache locality pbhagavatula
2021-01-12 8:39 ` [dpdk-dev] [PATCH v2] " pbhagavatula
2021-01-26 9:57 ` [dpdk-dev] [PATCH] " Jerin Jacob
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).