From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enhance Tx path cache locality, remove current tag type and group stores from datapath to conserve store buffers. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_evdev.c | 15 ++---- drivers/event/octeontx2/otx2_evdev.h | 24 ++++----- drivers/event/octeontx2/otx2_worker.c | 42 +++++---------- drivers/event/octeontx2/otx2_worker.h | 32 +++++------ drivers/event/octeontx2/otx2_worker_dual.c | 63 ++++++++++------------ drivers/event/octeontx2/otx2_worker_dual.h | 2 - drivers/net/octeontx2/otx2_ethdev_sec_tx.h | 9 ++-- 7 files changed, 74 insertions(+), 113 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c index 0fe014c24..14f16a68f 100644 --- a/drivers/event/octeontx2/otx2_evdev.c +++ b/drivers/event/octeontx2/otx2_evdev.c @@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev) ws->port = i; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base); + ws->base[0] = base; vws++; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base); + ws->base[1] = base; vws++; gws_cookie = ssogws_get_cookie(ws); @@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev) ws->port = i; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12); sso_set_port_ops(ws, base); + ws->base = base; gws_cookie = ssogws_get_cookie(ws); gws_cookie->event_dev = event_dev; @@ -1447,20 +1450,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) ws = event_dev->data->ports[i]; ssogws_reset((struct otx2_ssogws *)&ws->ws_state[0]); ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]); - ws->swtag_req = 0; ws->vws = 0; - ws->ws_state[0].cur_grp = 0; - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; - ws->ws_state[1].cur_grp = 0; - ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY; } else { struct otx2_ssogws *ws; ws = event_dev->data->ports[i]; ssogws_reset(ws); - ws->swtag_req = 0; - ws->cur_grp = 0; - ws->cur_tt = SSO_SYNC_EMPTY; } } @@ -1479,8 +1474,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) otx2_write64(enable, ws->grps_base[i] + SSO_LF_GGRP_QCTL); } - ws->ws_state[0].cur_grp = 0; - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; } else { struct otx2_ssogws *ws = event_dev->data->ports[0]; @@ -1492,8 +1485,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) otx2_write64(enable, ws->grps_base[i] + SSO_LF_GGRP_QCTL); } - ws->cur_grp = 0; - ws->cur_tt = SSO_SYNC_EMPTY; } /* reset SSO GWS cache */ diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 0513cb81c..e381b9e52 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -80,6 +80,8 @@ #define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK) #define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) +#define OTX2_SSOW_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff) +#define OTX2_SSOW_SWTAG_PEND(x) ((x) & BIT_ULL(62)) #define NSEC2USEC(__ns) ((__ns) / 1E3) #define USEC2NSEC(__us) ((__us) * 1E3) @@ -169,25 +171,23 @@ struct otx2_sso_evdev { uintptr_t wqp_op; \ uintptr_t swtag_flush_op; \ uintptr_t swtag_norm_op; \ - uintptr_t swtag_desched_op; \ - uint8_t cur_tt; \ - uint8_t cur_grp + uintptr_t swtag_desched_op; /* Event port aka GWS */ struct otx2_ssogws { /* Get Work Fastpath data */ OTX2_SSOGWS_OPS; - uint8_t swtag_req; + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; void *lookup_mem; uint8_t port; /* Add Work Fastpath data */ uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; - /* PTP timestamp */ - struct otx2_timesync_info *tstamp; /* Tx Fastpath data */ - uint8_t tx_adptr_data[] __rte_cache_aligned; + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct otx2_ssogws_state { @@ -197,18 +197,18 @@ struct otx2_ssogws_state { struct otx2_ssogws_dual { /* Get Work Fastpath data */ struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */ - uint8_t swtag_req; - uint8_t vws; /* Ping pong bit */ + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; void *lookup_mem; + uint8_t vws; /* Ping pong bit */ uint8_t port; /* Add Work Fastpath data */ uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; - /* PTP timestamp */ - struct otx2_timesync_info *tstamp; /* Tx Fastpath data */ - uint8_t tx_adptr_data[] __rte_cache_aligned; + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; static inline struct otx2_sso_evdev * diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c index b098407e0..7ed836c1e 100644 --- a/drivers/event/octeontx2/otx2_worker.c +++ b/drivers/event/octeontx2/otx2_worker.c @@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = ws->cur_tt; + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)); /* 96XX model * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED @@ -41,8 +41,6 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev) } else { otx2_ssogws_swtag_norm(ws, tag, new_tt); } - - ws->swtag_req = 1; } static __rte_always_inline void @@ -64,7 +62,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (ws->cur_grp == grp) + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp) otx2_ssogws_fwd_swtag(ws, ev); else /* @@ -75,12 +73,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) otx2_ssogws_fwd_group(ws, ev, grp); } -static __rte_always_inline void -otx2_ssogws_release_event(struct otx2_ssogws *ws) -{ - otx2_ssogws_swtag_flush(ws); -} - #define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ uint16_t __rte_hot \ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \ @@ -90,8 +82,7 @@ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \ \ RTE_SET_USED(timeout_ticks); \ \ - if (ws->swtag_req) { \ - ws->swtag_req = 0; \ + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \ otx2_ssogws_swtag_wait(ws); \ return 1; \ } \ @@ -117,8 +108,7 @@ otx2_ssogws_deq_timeout_ ##name(void *port, struct rte_event *ev, \ uint16_t ret = 1; \ uint64_t iter; \ \ - if (ws->swtag_req) { \ - ws->swtag_req = 0; \ + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \ otx2_ssogws_swtag_wait(ws); \ return ret; \ } \ @@ -149,8 +139,7 @@ otx2_ssogws_deq_seg_ ##name(void *port, struct rte_event *ev, \ \ RTE_SET_USED(timeout_ticks); \ \ - if (ws->swtag_req) { \ - ws->swtag_req = 0; \ + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \ otx2_ssogws_swtag_wait(ws); \ return 1; \ } \ @@ -177,8 +166,7 @@ otx2_ssogws_deq_seg_timeout_ ##name(void *port, struct rte_event *ev, \ uint16_t ret = 1; \ uint64_t iter; \ \ - if (ws->swtag_req) { \ - ws->swtag_req = 0; \ + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \ otx2_ssogws_swtag_wait(ws); \ return ret; \ } \ @@ -221,7 +209,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev) otx2_ssogws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - otx2_ssogws_release_event(ws); + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op); break; default: return 0; @@ -274,14 +262,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \ { \ struct otx2_ssogws *ws = port; \ uint64_t cmd[sz]; \ - int i; \ \ - for (i = 0; i < nb_events; i++) \ - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \ + (const uint64_t \ (*)[RTE_MAX_QUEUES_PER_PORT]) \ &ws->tx_adptr_data, \ flags); \ - return nb_events; \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -293,14 +280,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\ { \ uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ struct otx2_ssogws *ws = port; \ - int i; \ \ - for (i = 0; i < nb_events; i++) \ - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \ + (const uint64_t \ (*)[RTE_MAX_QUEUES_PER_PORT]) \ &ws->tx_adptr_data, \ (flags) | NIX_TX_MULTI_SEG_F); \ - return nb_events; \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -335,7 +321,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base, if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - otx2_ssogws_swtag_flush(ws); + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op); rte_mb(); aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT); ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT); diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 0a7d6671c..2b716c042 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY) { if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && @@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY && event.event_type == RTE_EVENT_TYPE_ETHDEV) { @@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) { otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + SSOW_LF_GWS_OP_SWTAG_UNTAG); - ws->cur_tt = SSO_SYNC_UNTAGGED; } static __rte_always_inline void -otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) +otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op) { - if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) { - ws->cur_tt = SSO_SYNC_EMPTY; + if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY) return; - } - otx2_write64(0, ws->swtag_flush_op); - ws->cur_tt = SSO_SYNC_EMPTY; + otx2_write64(0, flush_op); } static __rte_always_inline void @@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws) } static __rte_always_inline void -otx2_ssogws_head_wait(struct otx2_ssogws *ws) +otx2_ssogws_head_wait(uint64_t tag_op) { #ifdef RTE_ARCH_ARM64 uint64_t tag; @@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) " tbz %[tag], 35, rty%= \n" "done%=: \n" : [tag] "=&r" (tag) - : [tag_op] "r" (ws->tag_op) + : [tag_op] "r" (tag_op) ); #else /* Wait for the HEAD to be set */ - while (!(otx2_read64(ws->tag_op) & BIT_ULL(35))) + while (!(otx2_read64(tag_op) & BIT_ULL(35))) ; #endif } @@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m, } static __rte_always_inline uint16_t -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, - uint64_t *cmd, +otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { @@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, if ((flags & NIX_TX_OFFLOAD_SECURITY_F) && (m->ol_flags & PKT_TX_SEC_OFFLOAD)) { txq = otx2_ssogws_xtract_meta(m, txq_data); - return otx2_sec_event_tx(ws, ev, m, txq, flags); + return otx2_sec_event_tx(base, ev, m, txq, flags); } /* Perform header writes before barrier for TSO */ @@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, m->ol_flags, segdw, flags); if (!ev->sched_type) { otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); @@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, if (!ev->sched_type) { otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); @@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, return 1; } - otx2_ssogws_swtag_flush(ws); + otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); return 1; } diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c index 946488eab..820455788 100644 --- a/drivers/event/octeontx2/otx2_worker_dual.c +++ b/drivers/event/octeontx2/otx2_worker_dual.c @@ -26,9 +26,9 @@ static __rte_always_inline void otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws, const struct rte_event *ev) { + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)); const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = ws->cur_tt; /* 96XX model * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED @@ -59,22 +59,20 @@ otx2_ssogws_dual_fwd_group(struct otx2_ssogws_state *ws, } static __rte_always_inline void -otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws, - struct otx2_ssogws_state *vws, +otx2_ssogws_dual_forward_event(struct otx2_ssogws_state *vws, const struct rte_event *ev) { const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (vws->cur_grp == grp) { + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) { otx2_ssogws_dual_fwd_swtag(vws, ev); - ws->swtag_req = 1; } else { - /* - * Group has been changed for group based work pipelining, - * Use deschedule/add_work operation to transfer the event to - * new group/core - */ + /* + * Group has been changed for group based work pipelining, + * Use deschedule/add_work operation to transfer the event to + * new group/core + */ otx2_ssogws_dual_fwd_group(vws, ev, grp); } } @@ -90,10 +88,10 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev) rte_smp_mb(); return otx2_ssogws_dual_new_event(ws, ev); case RTE_EVENT_OP_FORWARD: - otx2_ssogws_dual_forward_event(ws, vws, ev); + otx2_ssogws_dual_forward_event(vws, ev); break; case RTE_EVENT_OP_RELEASE: - otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws); + otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op); break; default: return 0; @@ -135,7 +133,7 @@ otx2_ssogws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], struct otx2_ssogws_state *vws = &ws->ws_state[!ws->vws]; RTE_SET_USED(nb_events); - otx2_ssogws_dual_forward_event(ws, vws, ev); + otx2_ssogws_dual_forward_event(vws, ev); return 1; } @@ -150,10 +148,10 @@ otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev, \ \ rte_prefetch_non_temporal(ws); \ RTE_SET_USED(timeout_ticks); \ - if (ws->swtag_req) { \ + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \ + ws->ws_state[!ws->vws].tag_op))) { \ otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ &ws->ws_state[!ws->vws]); \ - ws->swtag_req = 0; \ return 1; \ } \ \ @@ -184,10 +182,10 @@ otx2_ssogws_dual_deq_timeout_ ##name(void *port, struct rte_event *ev, \ uint64_t iter; \ uint8_t gw; \ \ - if (ws->swtag_req) { \ + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \ + ws->ws_state[!ws->vws].tag_op))) { \ otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ &ws->ws_state[!ws->vws]); \ - ws->swtag_req = 0; \ return 1; \ } \ \ @@ -228,10 +226,10 @@ otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev, \ uint8_t gw; \ \ RTE_SET_USED(timeout_ticks); \ - if (ws->swtag_req) { \ + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \ + ws->ws_state[!ws->vws].tag_op))) { \ otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ &ws->ws_state[!ws->vws]); \ - ws->swtag_req = 0; \ return 1; \ } \ \ @@ -266,10 +264,10 @@ otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port, \ uint64_t iter; \ uint8_t gw; \ \ - if (ws->swtag_req) { \ + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \ + ws->ws_state[!ws->vws].tag_op))) { \ otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ &ws->ws_state[!ws->vws]); \ - ws->swtag_req = 0; \ return 1; \ } \ \ @@ -314,15 +312,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \ uint16_t nb_events) \ { \ struct otx2_ssogws_dual *ws = port; \ - struct otx2_ssogws *vws = \ - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ uint64_t cmd[sz]; \ \ RTE_SET_USED(nb_events); \ - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \ - (*)[RTE_MAX_QUEUES_PER_PORT]) \ - ws->tx_adptr_data, \ - flags); \ + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \ + cmd, (const uint64_t \ + (*)[RTE_MAX_QUEUES_PER_PORT]) \ + &ws->tx_adptr_data, flags); \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -333,16 +329,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \ struct rte_event ev[], \ uint16_t nb_events) \ { \ - struct otx2_ssogws_dual *ws = port; \ - struct otx2_ssogws *vws = \ - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct otx2_ssogws_dual *ws = port; \ \ RTE_SET_USED(nb_events); \ - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \ - (*)[RTE_MAX_QUEUES_PER_PORT]) \ - ws->tx_adptr_data, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \ + cmd, (const uint64_t \ + (*)[RTE_MAX_QUEUES_PER_PORT]) \ + &ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F);\ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h index 6e6061821..72b616439 100644 --- a/drivers/event/octeontx2/otx2_worker_dual.h +++ b/drivers/event/octeontx2/otx2_worker_dual.h @@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY) { if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h index 284bcd536..c8eae3d62 100644 --- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h +++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h @@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess, } static __rte_always_inline void -otx2_ssogws_head_wait(struct otx2_ssogws *ws); +otx2_ssogws_head_wait(uint64_t base); static __rte_always_inline int -otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, - struct rte_mbuf *m, const struct otx2_eth_txq *txq, - const uint32_t offload_flags) +otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m, + const struct otx2_eth_txq *txq, const uint32_t offload_flags) { uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail; struct otx2_sec_session_ipsec_ip *sess; @@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, __mempool_check_cookies(m->pool, (void **)&m, 1, 0); if (!ev->sched_type) - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); inst.param1 = sess->esn_hi >> 16; inst.param2 = sess->esn_hi & 0xffff; -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Enhance Tx path cache locality, remove current tag type and group stores from datapath to conserve store buffers. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- - SWTAG can complete asynchronously, we need to maintain swtag_req in ws datastructure. drivers/event/octeontx2/otx2_evdev.c | 13 ++------ drivers/event/octeontx2/otx2_evdev.h | 23 +++++++------ drivers/event/octeontx2/otx2_worker.c | 28 ++++++---------- drivers/event/octeontx2/otx2_worker.h | 32 +++++++----------- drivers/event/octeontx2/otx2_worker_dual.c | 39 ++++++++++------------ drivers/event/octeontx2/otx2_worker_dual.h | 2 -- drivers/net/octeontx2/otx2_ethdev_sec_tx.h | 9 +++-- 7 files changed, 59 insertions(+), 87 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c index 0fe014c24..80a786f21 100644 --- a/drivers/event/octeontx2/otx2_evdev.c +++ b/drivers/event/octeontx2/otx2_evdev.c @@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev) ws->port = i; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base); + ws->base[0] = base; vws++; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base); + ws->base[1] = base; vws++; gws_cookie = ssogws_get_cookie(ws); @@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev) ws->port = i; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12); sso_set_port_ops(ws, base); + ws->base = base; gws_cookie = ssogws_get_cookie(ws); gws_cookie->event_dev = event_dev; @@ -1449,18 +1452,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]); ws->swtag_req = 0; ws->vws = 0; - ws->ws_state[0].cur_grp = 0; - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; - ws->ws_state[1].cur_grp = 0; - ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY; } else { struct otx2_ssogws *ws; ws = event_dev->data->ports[i]; ssogws_reset(ws); ws->swtag_req = 0; - ws->cur_grp = 0; - ws->cur_tt = SSO_SYNC_EMPTY; } } @@ -1479,8 +1476,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) otx2_write64(enable, ws->grps_base[i] + SSO_LF_GGRP_QCTL); } - ws->ws_state[0].cur_grp = 0; - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; } else { struct otx2_ssogws *ws = event_dev->data->ports[0]; @@ -1492,8 +1487,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) otx2_write64(enable, ws->grps_base[i] + SSO_LF_GGRP_QCTL); } - ws->cur_grp = 0; - ws->cur_tt = SSO_SYNC_EMPTY; } /* reset SSO GWS cache */ diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 0513cb81c..ed9cbc86b 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -80,6 +80,7 @@ #define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK) #define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) +#define OTX2_SSOW_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff) #define NSEC2USEC(__ns) ((__ns) / 1E3) #define USEC2NSEC(__us) ((__us) * 1E3) @@ -169,25 +170,24 @@ struct otx2_sso_evdev { uintptr_t wqp_op; \ uintptr_t swtag_flush_op; \ uintptr_t swtag_norm_op; \ - uintptr_t swtag_desched_op; \ - uint8_t cur_tt; \ - uint8_t cur_grp + uintptr_t swtag_desched_op; /* Event port aka GWS */ struct otx2_ssogws { /* Get Work Fastpath data */ OTX2_SSOGWS_OPS; - uint8_t swtag_req; + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; void *lookup_mem; + uint8_t swtag_req; uint8_t port; /* Add Work Fastpath data */ uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; - /* PTP timestamp */ - struct otx2_timesync_info *tstamp; /* Tx Fastpath data */ - uint8_t tx_adptr_data[] __rte_cache_aligned; + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct otx2_ssogws_state { @@ -197,18 +197,19 @@ struct otx2_ssogws_state { struct otx2_ssogws_dual { /* Get Work Fastpath data */ struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ - void *lookup_mem; uint8_t port; /* Add Work Fastpath data */ uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; - /* PTP timestamp */ - struct otx2_timesync_info *tstamp; /* Tx Fastpath data */ - uint8_t tx_adptr_data[] __rte_cache_aligned; + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; static inline struct otx2_sso_evdev * diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c index b098407e0..95139d27a 100644 --- a/drivers/event/octeontx2/otx2_worker.c +++ b/drivers/event/octeontx2/otx2_worker.c @@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = ws->cur_tt; + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)); /* 96XX model * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED @@ -64,7 +64,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (ws->cur_grp == grp) + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp) otx2_ssogws_fwd_swtag(ws, ev); else /* @@ -75,12 +75,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) otx2_ssogws_fwd_group(ws, ev, grp); } -static __rte_always_inline void -otx2_ssogws_release_event(struct otx2_ssogws *ws) -{ - otx2_ssogws_swtag_flush(ws); -} - #define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ uint16_t __rte_hot \ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \ @@ -221,7 +215,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev) otx2_ssogws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - otx2_ssogws_release_event(ws); + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op); break; default: return 0; @@ -274,14 +268,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \ { \ struct otx2_ssogws *ws = port; \ uint64_t cmd[sz]; \ - int i; \ \ - for (i = 0; i < nb_events; i++) \ - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \ + (const uint64_t \ (*)[RTE_MAX_QUEUES_PER_PORT]) \ &ws->tx_adptr_data, \ flags); \ - return nb_events; \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -293,14 +286,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\ { \ uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ struct otx2_ssogws *ws = port; \ - int i; \ \ - for (i = 0; i < nb_events; i++) \ - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \ + (const uint64_t \ (*)[RTE_MAX_QUEUES_PER_PORT]) \ &ws->tx_adptr_data, \ (flags) | NIX_TX_MULTI_SEG_F); \ - return nb_events; \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -335,7 +327,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base, if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - otx2_ssogws_swtag_flush(ws); + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op); rte_mb(); aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT); ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT); diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 0a7d6671c..2b716c042 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY) { if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && @@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY && event.event_type == RTE_EVENT_TYPE_ETHDEV) { @@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) { otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + SSOW_LF_GWS_OP_SWTAG_UNTAG); - ws->cur_tt = SSO_SYNC_UNTAGGED; } static __rte_always_inline void -otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) +otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op) { - if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) { - ws->cur_tt = SSO_SYNC_EMPTY; + if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY) return; - } - otx2_write64(0, ws->swtag_flush_op); - ws->cur_tt = SSO_SYNC_EMPTY; + otx2_write64(0, flush_op); } static __rte_always_inline void @@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws) } static __rte_always_inline void -otx2_ssogws_head_wait(struct otx2_ssogws *ws) +otx2_ssogws_head_wait(uint64_t tag_op) { #ifdef RTE_ARCH_ARM64 uint64_t tag; @@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) " tbz %[tag], 35, rty%= \n" "done%=: \n" : [tag] "=&r" (tag) - : [tag_op] "r" (ws->tag_op) + : [tag_op] "r" (tag_op) ); #else /* Wait for the HEAD to be set */ - while (!(otx2_read64(ws->tag_op) & BIT_ULL(35))) + while (!(otx2_read64(tag_op) & BIT_ULL(35))) ; #endif } @@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m, } static __rte_always_inline uint16_t -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, - uint64_t *cmd, +otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { @@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, if ((flags & NIX_TX_OFFLOAD_SECURITY_F) && (m->ol_flags & PKT_TX_SEC_OFFLOAD)) { txq = otx2_ssogws_xtract_meta(m, txq_data); - return otx2_sec_event_tx(ws, ev, m, txq, flags); + return otx2_sec_event_tx(base, ev, m, txq, flags); } /* Perform header writes before barrier for TSO */ @@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, m->ol_flags, segdw, flags); if (!ev->sched_type) { otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); @@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, if (!ev->sched_type) { otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); @@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, return 1; } - otx2_ssogws_swtag_flush(ws); + otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); return 1; } diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c index 946488eab..81af4ca90 100644 --- a/drivers/event/octeontx2/otx2_worker_dual.c +++ b/drivers/event/octeontx2/otx2_worker_dual.c @@ -26,9 +26,9 @@ static __rte_always_inline void otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws, const struct rte_event *ev) { + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)); const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = ws->cur_tt; /* 96XX model * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED @@ -66,15 +66,15 @@ otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (vws->cur_grp == grp) { + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) { otx2_ssogws_dual_fwd_swtag(vws, ev); ws->swtag_req = 1; } else { - /* - * Group has been changed for group based work pipelining, - * Use deschedule/add_work operation to transfer the event to - * new group/core - */ + /* + * Group has been changed for group based work pipelining, + * Use deschedule/add_work operation to transfer the event to + * new group/core + */ otx2_ssogws_dual_fwd_group(vws, ev, grp); } } @@ -93,7 +93,7 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev) otx2_ssogws_dual_forward_event(ws, vws, ev); break; case RTE_EVENT_OP_RELEASE: - otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws); + otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op); break; default: return 0; @@ -314,15 +314,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \ uint16_t nb_events) \ { \ struct otx2_ssogws_dual *ws = port; \ - struct otx2_ssogws *vws = \ - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ uint64_t cmd[sz]; \ \ RTE_SET_USED(nb_events); \ - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \ - (*)[RTE_MAX_QUEUES_PER_PORT]) \ - ws->tx_adptr_data, \ - flags); \ + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \ + cmd, (const uint64_t \ + (*)[RTE_MAX_QUEUES_PER_PORT]) \ + &ws->tx_adptr_data, flags); \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -333,16 +331,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \ struct rte_event ev[], \ uint16_t nb_events) \ { \ - struct otx2_ssogws_dual *ws = port; \ - struct otx2_ssogws *vws = \ - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct otx2_ssogws_dual *ws = port; \ \ RTE_SET_USED(nb_events); \ - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \ - (*)[RTE_MAX_QUEUES_PER_PORT]) \ - ws->tx_adptr_data, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \ + cmd, (const uint64_t \ + (*)[RTE_MAX_QUEUES_PER_PORT]) \ + &ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F);\ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h index 6e6061821..72b616439 100644 --- a/drivers/event/octeontx2/otx2_worker_dual.h +++ b/drivers/event/octeontx2/otx2_worker_dual.h @@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY) { if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h index 284bcd536..c8eae3d62 100644 --- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h +++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h @@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess, } static __rte_always_inline void -otx2_ssogws_head_wait(struct otx2_ssogws *ws); +otx2_ssogws_head_wait(uint64_t base); static __rte_always_inline int -otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, - struct rte_mbuf *m, const struct otx2_eth_txq *txq, - const uint32_t offload_flags) +otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m, + const struct otx2_eth_txq *txq, const uint32_t offload_flags) { uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail; struct otx2_sec_session_ipsec_ip *sess; @@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, __mempool_check_cookies(m->pool, (void **)&m, 1, 0); if (!ev->sched_type) - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); inst.param1 = sess->esn_hi >> 16; inst.param2 = sess->esn_hi & 0xffff; -- 2.17.1
On Mon, Nov 23, 2020 at 2:48 AM <pbhagavatula@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Enhance Tx path cache locality, remove current tag type and group > stores from datapath to conserve store buffers. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Applied to dpdk-next-net-eventdev/for-main. Thanks > drivers/event/octeontx2/otx2_evdev.c | 15 ++---- > drivers/event/octeontx2/otx2_evdev.h | 24 ++++----- > drivers/event/octeontx2/otx2_worker.c | 42 +++++---------- > drivers/event/octeontx2/otx2_worker.h | 32 +++++------ > drivers/event/octeontx2/otx2_worker_dual.c | 63 ++++++++++------------ > drivers/event/octeontx2/otx2_worker_dual.h | 2 - > drivers/net/octeontx2/otx2_ethdev_sec_tx.h | 9 ++-- > 7 files changed, 74 insertions(+), 113 deletions(-) > > diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c > index 0fe014c24..14f16a68f 100644 > --- a/drivers/event/octeontx2/otx2_evdev.c > +++ b/drivers/event/octeontx2/otx2_evdev.c > @@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev) > ws->port = i; > base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); > sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base); > + ws->base[0] = base; > vws++; > > base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); > sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base); > + ws->base[1] = base; > vws++; > > gws_cookie = ssogws_get_cookie(ws); > @@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev) > ws->port = i; > base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12); > sso_set_port_ops(ws, base); > + ws->base = base; > > gws_cookie = ssogws_get_cookie(ws); > gws_cookie->event_dev = event_dev; > @@ -1447,20 +1450,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) > ws = event_dev->data->ports[i]; > ssogws_reset((struct otx2_ssogws *)&ws->ws_state[0]); > ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]); > - ws->swtag_req = 0; > ws->vws = 0; > - ws->ws_state[0].cur_grp = 0; > - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; > - ws->ws_state[1].cur_grp = 0; > - ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY; > } else { > struct otx2_ssogws *ws; > > ws = event_dev->data->ports[i]; > ssogws_reset(ws); > - ws->swtag_req = 0; > - ws->cur_grp = 0; > - ws->cur_tt = SSO_SYNC_EMPTY; > } > } > > @@ -1479,8 +1474,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) > otx2_write64(enable, ws->grps_base[i] + > SSO_LF_GGRP_QCTL); > } > - ws->ws_state[0].cur_grp = 0; > - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; > } else { > struct otx2_ssogws *ws = event_dev->data->ports[0]; > > @@ -1492,8 +1485,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) > otx2_write64(enable, ws->grps_base[i] + > SSO_LF_GGRP_QCTL); > } > - ws->cur_grp = 0; > - ws->cur_tt = SSO_SYNC_EMPTY; > } > > /* reset SSO GWS cache */ > diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h > index 0513cb81c..e381b9e52 100644 > --- a/drivers/event/octeontx2/otx2_evdev.h > +++ b/drivers/event/octeontx2/otx2_evdev.h > @@ -80,6 +80,8 @@ > > #define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK) > #define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) > +#define OTX2_SSOW_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff) > +#define OTX2_SSOW_SWTAG_PEND(x) ((x) & BIT_ULL(62)) > > #define NSEC2USEC(__ns) ((__ns) / 1E3) > #define USEC2NSEC(__us) ((__us) * 1E3) > @@ -169,25 +171,23 @@ struct otx2_sso_evdev { > uintptr_t wqp_op; \ > uintptr_t swtag_flush_op; \ > uintptr_t swtag_norm_op; \ > - uintptr_t swtag_desched_op; \ > - uint8_t cur_tt; \ > - uint8_t cur_grp > + uintptr_t swtag_desched_op; > > /* Event port aka GWS */ > struct otx2_ssogws { > /* Get Work Fastpath data */ > OTX2_SSOGWS_OPS; > - uint8_t swtag_req; > + /* PTP timestamp */ > + struct otx2_timesync_info *tstamp; > void *lookup_mem; > uint8_t port; > /* Add Work Fastpath data */ > uint64_t xaq_lmt __rte_cache_aligned; > uint64_t *fc_mem; > uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; > - /* PTP timestamp */ > - struct otx2_timesync_info *tstamp; > /* Tx Fastpath data */ > - uint8_t tx_adptr_data[] __rte_cache_aligned; > + uint64_t base __rte_cache_aligned; > + uint8_t tx_adptr_data[]; > } __rte_cache_aligned; > > struct otx2_ssogws_state { > @@ -197,18 +197,18 @@ struct otx2_ssogws_state { > struct otx2_ssogws_dual { > /* Get Work Fastpath data */ > struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */ > - uint8_t swtag_req; > - uint8_t vws; /* Ping pong bit */ > + /* PTP timestamp */ > + struct otx2_timesync_info *tstamp; > void *lookup_mem; > + uint8_t vws; /* Ping pong bit */ > uint8_t port; > /* Add Work Fastpath data */ > uint64_t xaq_lmt __rte_cache_aligned; > uint64_t *fc_mem; > uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; > - /* PTP timestamp */ > - struct otx2_timesync_info *tstamp; > /* Tx Fastpath data */ > - uint8_t tx_adptr_data[] __rte_cache_aligned; > + uint64_t base[2] __rte_cache_aligned; > + uint8_t tx_adptr_data[]; > } __rte_cache_aligned; > > static inline struct otx2_sso_evdev * > diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c > index b098407e0..7ed836c1e 100644 > --- a/drivers/event/octeontx2/otx2_worker.c > +++ b/drivers/event/octeontx2/otx2_worker.c > @@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev) > { > const uint32_t tag = (uint32_t)ev->event; > const uint8_t new_tt = ev->sched_type; > - const uint8_t cur_tt = ws->cur_tt; > + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)); > > /* 96XX model > * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED > @@ -41,8 +41,6 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev) > } else { > otx2_ssogws_swtag_norm(ws, tag, new_tt); > } > - > - ws->swtag_req = 1; > } > > static __rte_always_inline void > @@ -64,7 +62,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) > const uint8_t grp = ev->queue_id; > > /* Group hasn't changed, Use SWTAG to forward the event */ > - if (ws->cur_grp == grp) > + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp) > otx2_ssogws_fwd_swtag(ws, ev); > else > /* > @@ -75,12 +73,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) > otx2_ssogws_fwd_group(ws, ev, grp); > } > > -static __rte_always_inline void > -otx2_ssogws_release_event(struct otx2_ssogws *ws) > -{ > - otx2_ssogws_swtag_flush(ws); > -} > - > #define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ > uint16_t __rte_hot \ > otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \ > @@ -90,8 +82,7 @@ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \ > \ > RTE_SET_USED(timeout_ticks); \ > \ > - if (ws->swtag_req) { \ > - ws->swtag_req = 0; \ > + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \ > otx2_ssogws_swtag_wait(ws); \ > return 1; \ > } \ > @@ -117,8 +108,7 @@ otx2_ssogws_deq_timeout_ ##name(void *port, struct rte_event *ev, \ > uint16_t ret = 1; \ > uint64_t iter; \ > \ > - if (ws->swtag_req) { \ > - ws->swtag_req = 0; \ > + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \ > otx2_ssogws_swtag_wait(ws); \ > return ret; \ > } \ > @@ -149,8 +139,7 @@ otx2_ssogws_deq_seg_ ##name(void *port, struct rte_event *ev, \ > \ > RTE_SET_USED(timeout_ticks); \ > \ > - if (ws->swtag_req) { \ > - ws->swtag_req = 0; \ > + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \ > otx2_ssogws_swtag_wait(ws); \ > return 1; \ > } \ > @@ -177,8 +166,7 @@ otx2_ssogws_deq_seg_timeout_ ##name(void *port, struct rte_event *ev, \ > uint16_t ret = 1; \ > uint64_t iter; \ > \ > - if (ws->swtag_req) { \ > - ws->swtag_req = 0; \ > + if (OTX2_SSOW_SWTAG_PEND(otx2_read64(ws->tag_op))) { \ > otx2_ssogws_swtag_wait(ws); \ > return ret; \ > } \ > @@ -221,7 +209,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev) > otx2_ssogws_forward_event(ws, ev); > break; > case RTE_EVENT_OP_RELEASE: > - otx2_ssogws_release_event(ws); > + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op); > break; > default: > return 0; > @@ -274,14 +262,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \ > { \ > struct otx2_ssogws *ws = port; \ > uint64_t cmd[sz]; \ > - int i; \ > \ > - for (i = 0; i < nb_events; i++) \ > - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ > + RTE_SET_USED(nb_events); \ > + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \ > + (const uint64_t \ > (*)[RTE_MAX_QUEUES_PER_PORT]) \ > &ws->tx_adptr_data, \ > flags); \ > - return nb_events; \ > } > SSO_TX_ADPTR_ENQ_FASTPATH_FUNC > #undef T > @@ -293,14 +280,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\ > { \ > uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ > struct otx2_ssogws *ws = port; \ > - int i; \ > \ > - for (i = 0; i < nb_events; i++) \ > - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ > + RTE_SET_USED(nb_events); \ > + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \ > + (const uint64_t \ > (*)[RTE_MAX_QUEUES_PER_PORT]) \ > &ws->tx_adptr_data, \ > (flags) | NIX_TX_MULTI_SEG_F); \ > - return nb_events; \ > } > SSO_TX_ADPTR_ENQ_FASTPATH_FUNC > #undef T > @@ -335,7 +321,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base, > if (fn != NULL && ev.u64 != 0) > fn(arg, ev); > if (ev.sched_type != SSO_TT_EMPTY) > - otx2_ssogws_swtag_flush(ws); > + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op); > rte_mb(); > aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT); > ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT); > diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h > index 0a7d6671c..2b716c042 100644 > --- a/drivers/event/octeontx2/otx2_worker.h > +++ b/drivers/event/octeontx2/otx2_worker.h > @@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev, > event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | > (event.get_work0 & (0x3FFull << 36)) << 4 | > (event.get_work0 & 0xffffffff); > - ws->cur_tt = event.sched_type; > - ws->cur_grp = event.queue_id; > > if (event.sched_type != SSO_TT_EMPTY) { > if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && > @@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev, > event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | > (event.get_work0 & (0x3FFull << 36)) << 4 | > (event.get_work0 & 0xffffffff); > - ws->cur_tt = event.sched_type; > - ws->cur_grp = event.queue_id; > > if (event.sched_type != SSO_TT_EMPTY && > event.event_type == RTE_EVENT_TYPE_ETHDEV) { > @@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) > { > otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + > SSOW_LF_GWS_OP_SWTAG_UNTAG); > - ws->cur_tt = SSO_SYNC_UNTAGGED; > } > > static __rte_always_inline void > -otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) > +otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op) > { > - if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) { > - ws->cur_tt = SSO_SYNC_EMPTY; > + if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY) > return; > - } > - otx2_write64(0, ws->swtag_flush_op); > - ws->cur_tt = SSO_SYNC_EMPTY; > + otx2_write64(0, flush_op); > } > > static __rte_always_inline void > @@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws) > } > > static __rte_always_inline void > -otx2_ssogws_head_wait(struct otx2_ssogws *ws) > +otx2_ssogws_head_wait(uint64_t tag_op) > { > #ifdef RTE_ARCH_ARM64 > uint64_t tag; > @@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) > " tbz %[tag], 35, rty%= \n" > "done%=: \n" > : [tag] "=&r" (tag) > - : [tag_op] "r" (ws->tag_op) > + : [tag_op] "r" (tag_op) > ); > #else > /* Wait for the HEAD to be set */ > - while (!(otx2_read64(ws->tag_op) & BIT_ULL(35))) > + while (!(otx2_read64(tag_op) & BIT_ULL(35))) > ; > #endif > } > @@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m, > } > > static __rte_always_inline uint16_t > -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, > - uint64_t *cmd, > +otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, > const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], > const uint32_t flags) > { > @@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, > if ((flags & NIX_TX_OFFLOAD_SECURITY_F) && > (m->ol_flags & PKT_TX_SEC_OFFLOAD)) { > txq = otx2_ssogws_xtract_meta(m, txq_data); > - return otx2_sec_event_tx(ws, ev, m, txq, flags); > + return otx2_sec_event_tx(base, ev, m, txq, flags); > } > > /* Perform header writes before barrier for TSO */ > @@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, > m->ol_flags, segdw, flags); > if (!ev->sched_type) { > otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); > - otx2_ssogws_head_wait(ws); > + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); > if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) > otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, > txq->io_addr, segdw); > @@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, > > if (!ev->sched_type) { > otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); > - otx2_ssogws_head_wait(ws); > + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); > if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) > otx2_nix_xmit_one(cmd, txq->lmt_addr, > txq->io_addr, flags); > @@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, > return 1; > } > > - otx2_ssogws_swtag_flush(ws); > + otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG, > + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); > > return 1; > } > diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c > index 946488eab..820455788 100644 > --- a/drivers/event/octeontx2/otx2_worker_dual.c > +++ b/drivers/event/octeontx2/otx2_worker_dual.c > @@ -26,9 +26,9 @@ static __rte_always_inline void > otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws, > const struct rte_event *ev) > { > + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)); > const uint32_t tag = (uint32_t)ev->event; > const uint8_t new_tt = ev->sched_type; > - const uint8_t cur_tt = ws->cur_tt; > > /* 96XX model > * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED > @@ -59,22 +59,20 @@ otx2_ssogws_dual_fwd_group(struct otx2_ssogws_state *ws, > } > > static __rte_always_inline void > -otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws, > - struct otx2_ssogws_state *vws, > +otx2_ssogws_dual_forward_event(struct otx2_ssogws_state *vws, > const struct rte_event *ev) > { > const uint8_t grp = ev->queue_id; > > /* Group hasn't changed, Use SWTAG to forward the event */ > - if (vws->cur_grp == grp) { > + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) { > otx2_ssogws_dual_fwd_swtag(vws, ev); > - ws->swtag_req = 1; > } else { > - /* > - * Group has been changed for group based work pipelining, > - * Use deschedule/add_work operation to transfer the event to > - * new group/core > - */ > + /* > + * Group has been changed for group based work pipelining, > + * Use deschedule/add_work operation to transfer the event to > + * new group/core > + */ > otx2_ssogws_dual_fwd_group(vws, ev, grp); > } > } > @@ -90,10 +88,10 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev) > rte_smp_mb(); > return otx2_ssogws_dual_new_event(ws, ev); > case RTE_EVENT_OP_FORWARD: > - otx2_ssogws_dual_forward_event(ws, vws, ev); > + otx2_ssogws_dual_forward_event(vws, ev); > break; > case RTE_EVENT_OP_RELEASE: > - otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws); > + otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op); > break; > default: > return 0; > @@ -135,7 +133,7 @@ otx2_ssogws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], > struct otx2_ssogws_state *vws = &ws->ws_state[!ws->vws]; > > RTE_SET_USED(nb_events); > - otx2_ssogws_dual_forward_event(ws, vws, ev); > + otx2_ssogws_dual_forward_event(vws, ev); > > return 1; > } > @@ -150,10 +148,10 @@ otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev, \ > \ > rte_prefetch_non_temporal(ws); \ > RTE_SET_USED(timeout_ticks); \ > - if (ws->swtag_req) { \ > + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \ > + ws->ws_state[!ws->vws].tag_op))) { \ > otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ > &ws->ws_state[!ws->vws]); \ > - ws->swtag_req = 0; \ > return 1; \ > } \ > \ > @@ -184,10 +182,10 @@ otx2_ssogws_dual_deq_timeout_ ##name(void *port, struct rte_event *ev, \ > uint64_t iter; \ > uint8_t gw; \ > \ > - if (ws->swtag_req) { \ > + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \ > + ws->ws_state[!ws->vws].tag_op))) { \ > otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ > &ws->ws_state[!ws->vws]); \ > - ws->swtag_req = 0; \ > return 1; \ > } \ > \ > @@ -228,10 +226,10 @@ otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev, \ > uint8_t gw; \ > \ > RTE_SET_USED(timeout_ticks); \ > - if (ws->swtag_req) { \ > + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \ > + ws->ws_state[!ws->vws].tag_op))) { \ > otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ > &ws->ws_state[!ws->vws]); \ > - ws->swtag_req = 0; \ > return 1; \ > } \ > \ > @@ -266,10 +264,10 @@ otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port, \ > uint64_t iter; \ > uint8_t gw; \ > \ > - if (ws->swtag_req) { \ > + if (OTX2_SSOW_SWTAG_PEND(otx2_read64( \ > + ws->ws_state[!ws->vws].tag_op))) { \ > otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ > &ws->ws_state[!ws->vws]); \ > - ws->swtag_req = 0; \ > return 1; \ > } \ > \ > @@ -314,15 +312,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \ > uint16_t nb_events) \ > { \ > struct otx2_ssogws_dual *ws = port; \ > - struct otx2_ssogws *vws = \ > - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ > uint64_t cmd[sz]; \ > \ > RTE_SET_USED(nb_events); \ > - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \ > - (*)[RTE_MAX_QUEUES_PER_PORT]) \ > - ws->tx_adptr_data, \ > - flags); \ > + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \ > + cmd, (const uint64_t \ > + (*)[RTE_MAX_QUEUES_PER_PORT]) \ > + &ws->tx_adptr_data, flags); \ > } > SSO_TX_ADPTR_ENQ_FASTPATH_FUNC > #undef T > @@ -333,16 +329,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \ > struct rte_event ev[], \ > uint16_t nb_events) \ > { \ > - struct otx2_ssogws_dual *ws = port; \ > - struct otx2_ssogws *vws = \ > - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ > uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ > + struct otx2_ssogws_dual *ws = port; \ > \ > RTE_SET_USED(nb_events); \ > - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \ > - (*)[RTE_MAX_QUEUES_PER_PORT]) \ > - ws->tx_adptr_data, \ > - (flags) | NIX_TX_MULTI_SEG_F); \ > + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \ > + cmd, (const uint64_t \ > + (*)[RTE_MAX_QUEUES_PER_PORT]) \ > + &ws->tx_adptr_data, \ > + (flags) | NIX_TX_MULTI_SEG_F);\ > } > SSO_TX_ADPTR_ENQ_FASTPATH_FUNC > #undef T > diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h > index 6e6061821..72b616439 100644 > --- a/drivers/event/octeontx2/otx2_worker_dual.h > +++ b/drivers/event/octeontx2/otx2_worker_dual.h > @@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws, > event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | > (event.get_work0 & (0x3FFull << 36)) << 4 | > (event.get_work0 & 0xffffffff); > - ws->cur_tt = event.sched_type; > - ws->cur_grp = event.queue_id; > > if (event.sched_type != SSO_TT_EMPTY) { > if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && > diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h > index 284bcd536..c8eae3d62 100644 > --- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h > +++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h > @@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess, > } > > static __rte_always_inline void > -otx2_ssogws_head_wait(struct otx2_ssogws *ws); > +otx2_ssogws_head_wait(uint64_t base); > > static __rte_always_inline int > -otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, > - struct rte_mbuf *m, const struct otx2_eth_txq *txq, > - const uint32_t offload_flags) > +otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m, > + const struct otx2_eth_txq *txq, const uint32_t offload_flags) > { > uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail; > struct otx2_sec_session_ipsec_ip *sess; > @@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, > __mempool_check_cookies(m->pool, (void **)&m, 1, 0); > > if (!ev->sched_type) > - otx2_ssogws_head_wait(ws); > + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); > > inst.param1 = sess->esn_hi >> 16; > inst.param2 = sess->esn_hi & 0xffff; > -- > 2.17.1 >