From: Pavan Nikhilesh <pbhagavatula@marvell.com> SSO workslot shouldn't be flushed on Tx adapter enqueue as application might use reference count to re-transmit or forward the packet. Fixes: cb7ee83b6365 ("event/octeontx2: improve single flow performance") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_evdev.h | 1 + drivers/event/octeontx2/otx2_worker.h | 6 ++++-- drivers/event/octeontx2/otx2_worker_dual.c | 8 ++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 547e29d4a..e4e444ed7 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -79,6 +79,7 @@ #define SSOW_LF_GWS_OP_GWC_INVAL (0xe00ull) #define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK) +#define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define NSEC2USEC(__ns) ((__ns) / 1E3) #define USEC2NSEC(__us) ((__us) * 1E3) diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 3efd3ba97..67893f669 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -198,6 +198,10 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) static __rte_always_inline void otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) { + if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) { + ws->cur_tt = SSO_SYNC_EMPTY; + return; + } otx2_write64(0, ws->swtag_flush_op); ws->cur_tt = SSO_SYNC_EMPTY; } @@ -329,8 +333,6 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], } } - otx2_write64(0, ws->swtag_flush_op); - return 1; } diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c index 946488eab..f1823e29c 100644 --- a/drivers/event/octeontx2/otx2_worker_dual.c +++ b/drivers/event/octeontx2/otx2_worker_dual.c @@ -157,6 +157,8 @@ otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev, \ return 1; \ } \ \ + otx2_ssogws_swtag_flush((struct otx2_ssogws *) \ + &ws->ws_state[!ws->vws]); \ gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ &ws->ws_state[!ws->vws], ev, \ flags, ws->lookup_mem, \ @@ -191,6 +193,8 @@ otx2_ssogws_dual_deq_timeout_ ##name(void *port, struct rte_event *ev, \ return 1; \ } \ \ + otx2_ssogws_swtag_flush((struct otx2_ssogws *) \ + &ws->ws_state[!ws->vws]); \ gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ &ws->ws_state[!ws->vws], ev, \ flags, ws->lookup_mem, \ @@ -235,6 +239,8 @@ otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev, \ return 1; \ } \ \ + otx2_ssogws_swtag_flush((struct otx2_ssogws *) \ + &ws->ws_state[!ws->vws]); \ gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ &ws->ws_state[!ws->vws], ev, \ flags | NIX_RX_MULTI_SEG_F, \ @@ -273,6 +279,8 @@ otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port, \ return 1; \ } \ \ + otx2_ssogws_swtag_flush((struct otx2_ssogws *) \ + &ws->ws_state[!ws->vws]); \ gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ &ws->ws_state[!ws->vws], ev, \ flags | NIX_RX_MULTI_SEG_F, \ -- 2.17.1
From: Pavan Nikhilesh <pbhagavatula@marvell.com> Fix unconditional Tx flush, handle packet retransmit cases where flush has to be differed. Fixes: cb7ee83b6365 ("event/octeontx2: improve single flow performance") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_evdev.h | 1 + drivers/event/octeontx2/otx2_worker.c | 14 +++++++++----- drivers/event/octeontx2/otx2_worker.h | 20 +++++++++++++++----- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 547e29d4a..49a865e6f 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -79,6 +79,7 @@ #define SSOW_LF_GWS_OP_GWC_INVAL (0xe00ull) #define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK) +#define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) #define NSEC2USEC(__ns) ((__ns) / 1E3) #define USEC2NSEC(__us) ((__us) * 1E3) diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c index 1d427e4a3..b098407e0 100644 --- a/drivers/event/octeontx2/otx2_worker.c +++ b/drivers/event/octeontx2/otx2_worker.c @@ -274,12 +274,14 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \ { \ struct otx2_ssogws *ws = port; \ uint64_t cmd[sz]; \ + int i; \ \ - RTE_SET_USED(nb_events); \ - return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t \ + for (i = 0; i < nb_events; i++) \ + otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ (*)[RTE_MAX_QUEUES_PER_PORT]) \ &ws->tx_adptr_data, \ flags); \ + return nb_events; \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -289,14 +291,16 @@ uint16_t __rte_hot \ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\ uint16_t nb_events) \ { \ - struct otx2_ssogws *ws = port; \ uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct otx2_ssogws *ws = port; \ + int i; \ \ - RTE_SET_USED(nb_events); \ - return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t \ + for (i = 0; i < nb_events; i++) \ + otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ (*)[RTE_MAX_QUEUES_PER_PORT]) \ &ws->tx_adptr_data, \ (flags) | NIX_TX_MULTI_SEG_F); \ + return nb_events; \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 3efd3ba97..0a7d6671c 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -198,6 +198,10 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) static __rte_always_inline void otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) { + if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) { + ws->cur_tt = SSO_SYNC_EMPTY; + return; + } otx2_write64(0, ws->swtag_flush_op); ws->cur_tt = SSO_SYNC_EMPTY; } @@ -272,13 +276,14 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m, } static __rte_always_inline uint16_t -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], - uint64_t *cmd, const uint64_t - txq_data[][RTE_MAX_QUEUES_PER_PORT], +otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { - struct rte_mbuf *m = ev[0].mbuf; + struct rte_mbuf *m = ev->mbuf; const struct otx2_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; if ((flags & NIX_TX_OFFLOAD_SECURITY_F) && (m->ol_flags & PKT_TX_SEC_OFFLOAD)) { @@ -329,7 +334,12 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], } } - otx2_write64(0, ws->swtag_flush_op); + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + otx2_ssogws_swtag_flush(ws); return 1; } -- 2.17.1
On Fri, Nov 20, 2020 at 4:12 PM <pbhagavatula@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Fix unconditional Tx flush, handle packet retransmit cases where > flush has to be differed. > > Fixes: cb7ee83b6365 ("event/octeontx2: improve single flow performance") > Cc: stable@dpdk.org > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Reworded the git commit message to: event/octeontx2: fix unconditional Tx flush Fix unconditional Tx flush, in case of Tx only we need to check if work slot is non-empty before issuing flush. Also, in packet retransmit cases added check for the reference count and flush the work slot only for the last packet. Fixes: cb7ee83b6365 ("event/octeontx2: improve single flow performance") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Applied to dpdk-next-net-eventdev/for-main. Thanks > --- > drivers/event/octeontx2/otx2_evdev.h | 1 + > drivers/event/octeontx2/otx2_worker.c | 14 +++++++++----- > drivers/event/octeontx2/otx2_worker.h | 20 +++++++++++++++----- > 3 files changed, 25 insertions(+), 10 deletions(-) > > diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h > index 547e29d4a..49a865e6f 100644 > --- a/drivers/event/octeontx2/otx2_evdev.h > +++ b/drivers/event/octeontx2/otx2_evdev.h > @@ -79,6 +79,7 @@ > #define SSOW_LF_GWS_OP_GWC_INVAL (0xe00ull) > > #define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK) > +#define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) > > #define NSEC2USEC(__ns) ((__ns) / 1E3) > #define USEC2NSEC(__us) ((__us) * 1E3) > diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c > index 1d427e4a3..b098407e0 100644 > --- a/drivers/event/octeontx2/otx2_worker.c > +++ b/drivers/event/octeontx2/otx2_worker.c > @@ -274,12 +274,14 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \ > { \ > struct otx2_ssogws *ws = port; \ > uint64_t cmd[sz]; \ > + int i; \ > \ > - RTE_SET_USED(nb_events); \ > - return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t \ > + for (i = 0; i < nb_events; i++) \ > + otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ > (*)[RTE_MAX_QUEUES_PER_PORT]) \ > &ws->tx_adptr_data, \ > flags); \ > + return nb_events; \ > } > SSO_TX_ADPTR_ENQ_FASTPATH_FUNC > #undef T > @@ -289,14 +291,16 @@ uint16_t __rte_hot \ > otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\ > uint16_t nb_events) \ > { \ > - struct otx2_ssogws *ws = port; \ > uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ > + struct otx2_ssogws *ws = port; \ > + int i; \ > \ > - RTE_SET_USED(nb_events); \ > - return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t \ > + for (i = 0; i < nb_events; i++) \ > + otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ > (*)[RTE_MAX_QUEUES_PER_PORT]) \ > &ws->tx_adptr_data, \ > (flags) | NIX_TX_MULTI_SEG_F); \ > + return nb_events; \ > } > SSO_TX_ADPTR_ENQ_FASTPATH_FUNC > #undef T > diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h > index 3efd3ba97..0a7d6671c 100644 > --- a/drivers/event/octeontx2/otx2_worker.h > +++ b/drivers/event/octeontx2/otx2_worker.h > @@ -198,6 +198,10 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) > static __rte_always_inline void > otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) > { > + if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) { > + ws->cur_tt = SSO_SYNC_EMPTY; > + return; > + } > otx2_write64(0, ws->swtag_flush_op); > ws->cur_tt = SSO_SYNC_EMPTY; > } > @@ -272,13 +276,14 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m, > } > > static __rte_always_inline uint16_t > -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], > - uint64_t *cmd, const uint64_t > - txq_data[][RTE_MAX_QUEUES_PER_PORT], > +otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, > + uint64_t *cmd, > + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], > const uint32_t flags) > { > - struct rte_mbuf *m = ev[0].mbuf; > + struct rte_mbuf *m = ev->mbuf; > const struct otx2_eth_txq *txq; > + uint16_t ref_cnt = m->refcnt; > > if ((flags & NIX_TX_OFFLOAD_SECURITY_F) && > (m->ol_flags & PKT_TX_SEC_OFFLOAD)) { > @@ -329,7 +334,12 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], > } > } > > - otx2_write64(0, ws->swtag_flush_op); > + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { > + if (ref_cnt > 1) > + return 1; > + } > + > + otx2_ssogws_swtag_flush(ws); > > return 1; > } > -- > 2.17.1 >