* [dpdk-dev] [PATCH 1/4] event/octeontx2: add switch tag flush op @ 2020-09-15 18:56 Harman Kalra 2020-09-15 18:56 ` [dpdk-dev] [PATCH 2/4] event/octeontx2: improve single flow performance Harman Kalra ` (2 more replies) 0 siblings, 3 replies; 14+ messages in thread From: Harman Kalra @ 2020-09-15 18:56 UTC (permalink / raw) To: Pavan Nikhilesh, Jerin Jacob; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add SWTAG flush operation at the end of transmit sequence to immediately release the tag held by the core. Reuse Tag address to check SWTAG completion status. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_evdev.c | 2 +- drivers/event/octeontx2/otx2_evdev.h | 18 +++++++++--------- drivers/event/octeontx2/otx2_worker.h | 27 +++++++++++++-------------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c index b8b57c388..faf8a5f17 100644 --- a/drivers/event/octeontx2/otx2_evdev.c +++ b/drivers/event/octeontx2/otx2_evdev.c @@ -771,7 +771,7 @@ sso_set_port_ops(struct otx2_ssogws *ws, uintptr_t base) ws->tag_op = base + SSOW_LF_GWS_TAG; ws->wqp_op = base + SSOW_LF_GWS_WQP; ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK; - ws->swtp_op = base + SSOW_LF_GWS_SWTP; + ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; } diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 873724dd4..9577d867d 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -162,15 +162,15 @@ struct otx2_sso_evdev { struct otx2_timesync_info *tstamp; } __rte_cache_aligned; -#define OTX2_SSOGWS_OPS \ - /* WS ops */ \ - uintptr_t getwrk_op; \ - uintptr_t tag_op; \ - uintptr_t wqp_op; \ - uintptr_t swtp_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t swtag_desched_op; \ - uint8_t cur_tt; \ +#define OTX2_SSOGWS_OPS \ + /* WS ops */ \ + uintptr_t getwrk_op; \ + uintptr_t tag_op; \ + uintptr_t wqp_op; \ + uintptr_t swtag_flush_op; \ + uintptr_t swtag_norm_op; \ + uintptr_t swtag_desched_op; \ + uint8_t cur_tt; \ uint8_t cur_grp /* Event port aka GWS */ diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 924ff7ff4..1bf8afedf 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -190,8 +190,7 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) static __rte_always_inline void otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) { - otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + - SSOW_LF_GWS_OP_SWTAG_FLUSH); + otx2_write64(0, ws->swtag_flush_op); ws->cur_tt = SSO_SYNC_EMPTY; } @@ -208,20 +207,18 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws) #ifdef RTE_ARCH_ARM64 uint64_t swtp; - asm volatile ( - " ldr %[swtb], [%[swtp_loc]] \n" - " cbz %[swtb], done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " cbnz %[swtb], rty%= \n" - "done%=: \n" - : [swtb] "=&r" (swtp) - : [swtp_loc] "r" (ws->swtp_op) - ); + asm volatile(" ldr %[swtb], [%[swtp_loc]] \n" + " tbz %[swtb], 62, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[swtb], [%[swtp_loc]] \n" + " tbnz %[swtb], 62, rty%= \n" + "done%=: \n" + : [swtb] "=&r" (swtp) + : [swtp_loc] "r" (ws->tag_op)); #else /* Wait for the SWTAG/SWTAG_FULL operation */ - while (otx2_read64(ws->swtp_op)) + while (otx2_read64(ws->tag_op) & BIT_ULL(62)) ; #endif } @@ -309,6 +306,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); } + otx2_write64(0, ws->swtag_flush_op); + return 1; } -- 2.18.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* [dpdk-dev] [PATCH 2/4] event/octeontx2: improve single flow performance 2020-09-15 18:56 [dpdk-dev] [PATCH 1/4] event/octeontx2: add switch tag flush op Harman Kalra @ 2020-09-15 18:56 ` Harman Kalra 2020-10-05 9:29 ` Jerin Jacob 2020-09-15 18:56 ` [dpdk-dev] [PATCH 3/4] net/octeontx2: fix jumbo frame crash Harman Kalra 2020-09-15 18:56 ` [dpdk-dev] [PATCH 4/4] app/eventdev: enable fast free offload Harman Kalra 2 siblings, 1 reply; 14+ messages in thread From: Harman Kalra @ 2020-09-15 18:56 UTC (permalink / raw) To: Pavan Nikhilesh, Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Improve single flow performance by moving the point of coherence to the end of transmit sequence. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_worker.h | 35 +++++++++++++++++---------- drivers/net/octeontx2/otx2_tx.h | 18 ++++++++++++++ 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 1bf8afedf..32d611458 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -247,15 +247,6 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) #endif } -static __rte_always_inline void -otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag) -{ - if (wait_flag) - otx2_ssogws_head_wait(ws); - - rte_cio_wmb(); -} - static __rte_always_inline const struct otx2_eth_txq * otx2_ssogws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) @@ -287,10 +278,9 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], return otx2_sec_event_tx(ws, ev, m, txq, flags); } - rte_prefetch_non_temporal(&txq_data[m->port][0]); /* Perform header writes before barrier for TSO */ otx2_nix_xmit_prepare_tso(m, flags); - otx2_ssogws_order(ws, !ev->sched_type); + rte_cio_wmb(); txq = otx2_ssogws_xtract_meta(m, txq_data); otx2_ssogws_prepare_pkt(txq, m, cmd, flags); @@ -298,12 +288,31 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], const uint16_t segdw = otx2_nix_prepare_mseg(m, cmd, flags); otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], m->ol_flags, segdw, flags); - otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); + if (!ev->sched_type) { + otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + otx2_ssogws_head_wait(ws); + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } } else { /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], m->ol_flags, 4, flags); - otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); + + if (!ev->sched_type) { + otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + otx2_ssogws_head_wait(ws); + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) + otx2_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } } otx2_write64(0, ws->swtag_flush_op); diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h index 3c4317092..caf170fd1 100644 --- a/drivers/net/octeontx2/otx2_tx.h +++ b/drivers/net/octeontx2/otx2_tx.h @@ -383,6 +383,18 @@ otx2_nix_xmit_one(uint64_t *cmd, void *lmt_addr, } while (lmt_status == 0); } +static __rte_always_inline void +otx2_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags) +{ + otx2_lmt_mov(lmt_addr, cmd, otx2_nix_tx_ext_subs(flags)); +} + +static __rte_always_inline uint64_t +otx2_nix_xmit_submit_lmt(const rte_iova_t io_addr) +{ + return otx2_lmt_submit(io_addr); +} + static __rte_always_inline uint16_t otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) { @@ -453,6 +465,12 @@ otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) return segdw; } +static __rte_always_inline void +otx2_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw) +{ + otx2_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); +} + static __rte_always_inline void otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr, uint16_t segdw) -- 2.18.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [dpdk-dev] [PATCH 2/4] event/octeontx2: improve single flow performance 2020-09-15 18:56 ` [dpdk-dev] [PATCH 2/4] event/octeontx2: improve single flow performance Harman Kalra @ 2020-10-05 9:29 ` Jerin Jacob 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Harman Kalra 0 siblings, 1 reply; 14+ messages in thread From: Jerin Jacob @ 2020-10-05 9:29 UTC (permalink / raw) To: Harman Kalra Cc: Pavan Nikhilesh, Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K, dpdk-dev On Wed, Sep 16, 2020 at 12:27 AM Harman Kalra <hkalra@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Improve single flow performance by moving the point of coherence > to the end of transmit sequence. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> > --- > drivers/event/octeontx2/otx2_worker.h | 35 +++++++++++++++++---------- > drivers/net/octeontx2/otx2_tx.h | 18 ++++++++++++++ > 2 files changed, 40 insertions(+), 13 deletions(-) Failed[1] to apply this patch on dpdk-next-eventdev Could you rebase this patch to dpdk-next-eventdev tree and send an update version? [1] [for-main][dpdk-next-eventdev] $ git am -3 /tmp/r/2-4-event-octeontx2-improve-single-flow-performance Applying: event/octeontx2: improve single flow performance error: sha1 information is lacking or useless (drivers/event/octeontx2/otx2_worker.h). error: could not build fake ancestor Patch failed at 0001 event/octeontx2: improve single flow performance hint: Use 'git am --show-current-patch=diff' to see the failed patch When you have resolved this problem, run "git am --continue". If you prefer to skip this patch, run "git am --skip" instead. To restore the original branch and stop patching, run "git am --abort" > > diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h > index 1bf8afedf..32d611458 100644 > --- a/drivers/event/octeontx2/otx2_worker.h > +++ b/drivers/event/octeontx2/otx2_worker.h > @@ -247,15 +247,6 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) > #endif > } > > -static __rte_always_inline void > -otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag) > -{ > - if (wait_flag) > - otx2_ssogws_head_wait(ws); > - > - rte_cio_wmb(); > -} > - > static __rte_always_inline const struct otx2_eth_txq * > otx2_ssogws_xtract_meta(struct rte_mbuf *m, > const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) > @@ -287,10 +278,9 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], > return otx2_sec_event_tx(ws, ev, m, txq, flags); > } > > - rte_prefetch_non_temporal(&txq_data[m->port][0]); > /* Perform header writes before barrier for TSO */ > otx2_nix_xmit_prepare_tso(m, flags); > - otx2_ssogws_order(ws, !ev->sched_type); > + rte_cio_wmb(); > txq = otx2_ssogws_xtract_meta(m, txq_data); > otx2_ssogws_prepare_pkt(txq, m, cmd, flags); > > @@ -298,12 +288,31 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], > const uint16_t segdw = otx2_nix_prepare_mseg(m, cmd, flags); > otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], > m->ol_flags, segdw, flags); > - otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); > + if (!ev->sched_type) { > + otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); > + otx2_ssogws_head_wait(ws); > + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) > + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, > + txq->io_addr, segdw); > + } else { > + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, > + segdw); > + } > } else { > /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ > otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], > m->ol_flags, 4, flags); > - otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); > + > + if (!ev->sched_type) { > + otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); > + otx2_ssogws_head_wait(ws); > + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) > + otx2_nix_xmit_one(cmd, txq->lmt_addr, > + txq->io_addr, flags); > + } else { > + otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, > + flags); > + } > } > > otx2_write64(0, ws->swtag_flush_op); > diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h > index 3c4317092..caf170fd1 100644 > --- a/drivers/net/octeontx2/otx2_tx.h > +++ b/drivers/net/octeontx2/otx2_tx.h > @@ -383,6 +383,18 @@ otx2_nix_xmit_one(uint64_t *cmd, void *lmt_addr, > } while (lmt_status == 0); > } > > +static __rte_always_inline void > +otx2_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags) > +{ > + otx2_lmt_mov(lmt_addr, cmd, otx2_nix_tx_ext_subs(flags)); > +} > + > +static __rte_always_inline uint64_t > +otx2_nix_xmit_submit_lmt(const rte_iova_t io_addr) > +{ > + return otx2_lmt_submit(io_addr); > +} > + > static __rte_always_inline uint16_t > otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) > { > @@ -453,6 +465,12 @@ otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) > return segdw; > } > > +static __rte_always_inline void > +otx2_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw) > +{ > + otx2_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); > +} > + > static __rte_always_inline void > otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, > rte_iova_t io_addr, uint16_t segdw) > -- > 2.18.0 > ^ permalink raw reply [flat|nested] 14+ messages in thread
* [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op 2020-10-05 9:29 ` Jerin Jacob @ 2020-10-08 18:48 ` Harman Kalra 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 2/4] event/octeontx2: improve single flow performance Harman Kalra ` (3 more replies) 0 siblings, 4 replies; 14+ messages in thread From: Harman Kalra @ 2020-10-08 18:48 UTC (permalink / raw) To: Pavan Nikhilesh, Jerin Jacob; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Add SWTAG flush operation at the end of transmit sequence to immediately release the tag held by the core. Reuse Tag address to check SWTAG completion status. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- drivers/event/octeontx2/otx2_evdev.c | 2 +- drivers/event/octeontx2/otx2_evdev.h | 18 +++++++++--------- drivers/event/octeontx2/otx2_worker.h | 27 +++++++++++++-------------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c index b8b57c388..faf8a5f17 100644 --- a/drivers/event/octeontx2/otx2_evdev.c +++ b/drivers/event/octeontx2/otx2_evdev.c @@ -771,7 +771,7 @@ sso_set_port_ops(struct otx2_ssogws *ws, uintptr_t base) ws->tag_op = base + SSOW_LF_GWS_TAG; ws->wqp_op = base + SSOW_LF_GWS_WQP; ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK; - ws->swtp_op = base + SSOW_LF_GWS_SWTP; + ws->swtag_flush_op = base + SSOW_LF_GWS_OP_SWTAG_FLUSH; ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; } diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 873724dd4..9577d867d 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -162,15 +162,15 @@ struct otx2_sso_evdev { struct otx2_timesync_info *tstamp; } __rte_cache_aligned; -#define OTX2_SSOGWS_OPS \ - /* WS ops */ \ - uintptr_t getwrk_op; \ - uintptr_t tag_op; \ - uintptr_t wqp_op; \ - uintptr_t swtp_op; \ - uintptr_t swtag_norm_op; \ - uintptr_t swtag_desched_op; \ - uint8_t cur_tt; \ +#define OTX2_SSOGWS_OPS \ + /* WS ops */ \ + uintptr_t getwrk_op; \ + uintptr_t tag_op; \ + uintptr_t wqp_op; \ + uintptr_t swtag_flush_op; \ + uintptr_t swtag_norm_op; \ + uintptr_t swtag_desched_op; \ + uint8_t cur_tt; \ uint8_t cur_grp /* Event port aka GWS */ diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index cde1288d9..80dfe3e73 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -190,8 +190,7 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) static __rte_always_inline void otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) { - otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + - SSOW_LF_GWS_OP_SWTAG_FLUSH); + otx2_write64(0, ws->swtag_flush_op); ws->cur_tt = SSO_SYNC_EMPTY; } @@ -208,20 +207,18 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws) #ifdef RTE_ARCH_ARM64 uint64_t swtp; - asm volatile ( - " ldr %[swtb], [%[swtp_loc]] \n" - " cbz %[swtb], done%= \n" - " sevl \n" - "rty%=: wfe \n" - " ldr %[swtb], [%[swtp_loc]] \n" - " cbnz %[swtb], rty%= \n" - "done%=: \n" - : [swtb] "=&r" (swtp) - : [swtp_loc] "r" (ws->swtp_op) - ); + asm volatile(" ldr %[swtb], [%[swtp_loc]] \n" + " tbz %[swtb], 62, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[swtb], [%[swtp_loc]] \n" + " tbnz %[swtb], 62, rty%= \n" + "done%=: \n" + : [swtb] "=&r" (swtp) + : [swtp_loc] "r" (ws->tag_op)); #else /* Wait for the SWTAG/SWTAG_FULL operation */ - while (otx2_read64(ws->swtp_op)) + while (otx2_read64(ws->tag_op) & BIT_ULL(62)) ; #endif } @@ -309,6 +306,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); } + otx2_write64(0, ws->swtag_flush_op); + return 1; } -- 2.18.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* [dpdk-dev] [PATCH v2 2/4] event/octeontx2: improve single flow performance 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Harman Kalra @ 2020-10-08 18:48 ` Harman Kalra 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 3/4] net/octeontx2: fix jumbo frame crash Harman Kalra ` (2 subsequent siblings) 3 siblings, 0 replies; 14+ messages in thread From: Harman Kalra @ 2020-10-08 18:48 UTC (permalink / raw) To: Pavan Nikhilesh, Jerin Jacob, Nithin Dabilpuram, Kiran Kumar K; +Cc: dev From: Pavan Nikhilesh <pbhagavatula@marvell.com> Improve single flow performance by moving the point of coherence to the end of transmit sequence. Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> --- V2: * replace rte_cio_wmb with rte_io_wmb drivers/event/octeontx2/otx2_worker.h | 35 +++++++++++++++++---------- drivers/net/octeontx2/otx2_tx.h | 18 ++++++++++++++ 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 80dfe3e73..757fa6fe5 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -247,15 +247,6 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) #endif } -static __rte_always_inline void -otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag) -{ - if (wait_flag) - otx2_ssogws_head_wait(ws); - - rte_io_wmb(); -} - static __rte_always_inline const struct otx2_eth_txq * otx2_ssogws_xtract_meta(struct rte_mbuf *m, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) @@ -287,10 +278,9 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], return otx2_sec_event_tx(ws, ev, m, txq, flags); } - rte_prefetch_non_temporal(&txq_data[m->port][0]); /* Perform header writes before barrier for TSO */ otx2_nix_xmit_prepare_tso(m, flags); - otx2_ssogws_order(ws, !ev->sched_type); + rte_io_wmb(); txq = otx2_ssogws_xtract_meta(m, txq_data); otx2_ssogws_prepare_pkt(txq, m, cmd, flags); @@ -298,12 +288,31 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], const uint16_t segdw = otx2_nix_prepare_mseg(m, cmd, flags); otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], m->ol_flags, segdw, flags); - otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); + if (!ev->sched_type) { + otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + otx2_ssogws_head_wait(ws); + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } } else { /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], m->ol_flags, 4, flags); - otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); + + if (!ev->sched_type) { + otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + otx2_ssogws_head_wait(ws); + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) + otx2_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } } otx2_write64(0, ws->swtag_flush_op); diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h index 3c4317092..caf170fd1 100644 --- a/drivers/net/octeontx2/otx2_tx.h +++ b/drivers/net/octeontx2/otx2_tx.h @@ -383,6 +383,18 @@ otx2_nix_xmit_one(uint64_t *cmd, void *lmt_addr, } while (lmt_status == 0); } +static __rte_always_inline void +otx2_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags) +{ + otx2_lmt_mov(lmt_addr, cmd, otx2_nix_tx_ext_subs(flags)); +} + +static __rte_always_inline uint64_t +otx2_nix_xmit_submit_lmt(const rte_iova_t io_addr) +{ + return otx2_lmt_submit(io_addr); +} + static __rte_always_inline uint16_t otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) { @@ -453,6 +465,12 @@ otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) return segdw; } +static __rte_always_inline void +otx2_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw) +{ + otx2_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); +} + static __rte_always_inline void otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr, uint16_t segdw) -- 2.18.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* [dpdk-dev] [PATCH v2 3/4] net/octeontx2: fix jumbo frame crash 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Harman Kalra 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 2/4] event/octeontx2: improve single flow performance Harman Kalra @ 2020-10-08 18:48 ` Harman Kalra 2020-10-16 4:04 ` [dpdk-dev] [dpdk-stable] " Thomas Monjalon 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 4/4] app/eventdev: enable fast free offload Harman Kalra 2020-10-11 10:40 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Jerin Jacob 3 siblings, 1 reply; 14+ messages in thread From: Harman Kalra @ 2020-10-08 18:48 UTC (permalink / raw) To: Jerin Jacob, Nithin Dabilpuram, Pavan Nikhilesh, Kiran Kumar K Cc: dev, Harman Kalra, stable Issue has been observed in case of multi segments where mbuf data gets corrupted due to missing barriers. Changes made to mbuf just before LMTST by one core gets updatded when the same mbuf is in use by another core, leading to corruption. It should be ensured that all changes made to mbuf should be written before LMTST. Fixes: cbd5710db48d ("net/octeontx2: add Tx multi segment version") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Harman Kalra <hkalra@marvell.com> --- V2: * replace rte_cio_wmb with rte_io_wmb drivers/common/octeontx2/otx2_io_arm64.h | 12 ++++++++++ drivers/common/octeontx2/otx2_io_generic.h | 16 +++++++++++--- drivers/event/octeontx2/otx2_worker.h | 20 +++++++++++++---- drivers/mempool/octeontx2/otx2_mempool_ops.c | 4 ++++ drivers/net/octeontx2/otx2_tx.c | 23 ++++++++++++++------ drivers/net/octeontx2/otx2_tx.h | 23 ++++++++++++++++++++ 6 files changed, 84 insertions(+), 14 deletions(-) diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h index 7e45329b3..b5c85d9a6 100644 --- a/drivers/common/octeontx2/otx2_io_arm64.h +++ b/drivers/common/octeontx2/otx2_io_arm64.h @@ -63,6 +63,18 @@ otx2_lmt_submit(rte_iova_t io_address) return result; } +static __rte_always_inline uint64_t +otx2_lmt_submit_release(rte_iova_t io_address) +{ + uint64_t result; + + asm volatile ( + ".cpu generic+lse\n" + "ldeorl xzr,%x[rf],[%[rs]]" : + [rf] "=r"(result) : [rs] "r"(io_address)); + return result; +} + static __rte_always_inline void otx2_lmt_mov(void *out, const void *in, const uint32_t lmtext) { diff --git a/drivers/common/octeontx2/otx2_io_generic.h b/drivers/common/octeontx2/otx2_io_generic.h index b1d754008..da64c9b31 100644 --- a/drivers/common/octeontx2/otx2_io_generic.h +++ b/drivers/common/octeontx2/otx2_io_generic.h @@ -45,12 +45,22 @@ otx2_lmt_submit(uint64_t io_address) return 0; } +static inline int64_t +otx2_lmt_submit_release(uint64_t io_address) +{ + RTE_SET_USED(io_address); + + return 0; +} + static __rte_always_inline void otx2_lmt_mov(void *out, const void *in, const uint32_t lmtext) { - RTE_SET_USED(out); - RTE_SET_USED(in); - RTE_SET_USED(lmtext); + /* Copy four words if lmtext = 0 + * six words if lmtext = 1 + * eight words if lmtext =2 + */ + memcpy(out, in, (4 + (2 * lmtext)) * sizeof(uint64_t)); } static __rte_always_inline void diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 757fa6fe5..5eb83435e 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -280,7 +280,19 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], /* Perform header writes before barrier for TSO */ otx2_nix_xmit_prepare_tso(m, flags); - rte_io_wmb(); + /* Lets commit any changes in the packet here in case of single seg as + * no further changes to mbuf will be done. + * While for multi seg all mbufs used are set to NULL in + * otx2_nix_prepare_mseg() after preparing the sg list and these changes + * should be committed before LMTST. + * Also in no fast free case some mbuf fields are updated in + * otx2_nix_prefree_seg + * Hence otx2_nix_xmit_submit_lmt_release/otx2_nix_xmit_mseg_one_release + * has store barrier for multiseg. + */ + if (!(flags & NIX_TX_MULTI_SEG_F) && + !(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); txq = otx2_ssogws_xtract_meta(m, txq_data); otx2_ssogws_prepare_pkt(txq, m, cmd, flags); @@ -291,12 +303,12 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], if (!ev->sched_type) { otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); otx2_ssogws_head_wait(ws); - if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) + if (otx2_nix_xmit_submit_lmt_release(txq->io_addr) == 0) otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); } else { - otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, - segdw); + otx2_nix_xmit_mseg_one_release(cmd, txq->lmt_addr, + txq->io_addr, segdw); } } else { /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ diff --git a/drivers/mempool/octeontx2/otx2_mempool_ops.c b/drivers/mempool/octeontx2/otx2_mempool_ops.c index 5229a7cfb..9ff71bcf6 100644 --- a/drivers/mempool/octeontx2/otx2_mempool_ops.c +++ b/drivers/mempool/octeontx2/otx2_mempool_ops.c @@ -15,6 +15,10 @@ otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n) const uint64_t addr = npa_lf_aura_handle_to_base(aura_handle) + NPA_LF_AURA_OP_FREE0; + /* Ensure mbuf init changes are written before the free pointers + * are enqueued to the stack. + */ + rte_io_wmb(); for (index = 0; index < n; index++) otx2_store_pair((uint64_t)obj_table[index], reg, addr); diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c index 1b75cd559..4458d8bca 100644 --- a/drivers/net/octeontx2/otx2_tx.c +++ b/drivers/net/octeontx2/otx2_tx.c @@ -38,8 +38,11 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, otx2_nix_xmit_prepare_tso(tx_pkts[i], flags); } - /* Lets commit any changes in the packet */ - rte_io_wmb(); + /* Lets commit any changes in the packet here as no further changes + * to the packet will be done unless no fast free is enabled. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); for (i = 0; i < pkts; i++) { otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags); @@ -74,12 +77,11 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, otx2_nix_xmit_prepare_tso(tx_pkts[i], flags); } - /* Lets commit any changes in the packet */ - rte_io_wmb(); - for (i = 0; i < pkts; i++) { otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags); segdw = otx2_nix_prepare_mseg(tx_pkts[i], cmd, flags); + /* Lets commit any changes in the packet */ + rte_io_wmb(); otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], tx_pkts[i]->ol_flags, segdw, flags); @@ -127,8 +129,11 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; - /* Lets commit any changes in the packet */ - rte_io_wmb(); + /* Lets commit any changes in the packet here as no further changes + * to the packet will be done unless no fast free is enabled. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]); senddesc23_w0 = senddesc01_w0; @@ -221,6 +226,10 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, 1, 0); senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + /* Ensuring mbuf fields which got updated in + * otx2_nix_prefree_seg are written before LMTST. + */ + rte_io_wmb(); } else { struct rte_mbuf *mbuf; /* Mark mempool object as "put" since diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h index caf170fd1..d6ea3b487 100644 --- a/drivers/net/octeontx2/otx2_tx.h +++ b/drivers/net/octeontx2/otx2_tx.h @@ -363,6 +363,10 @@ otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) * DF bit = 0 otherwise */ send_hdr->w0.df = otx2_nix_prefree_seg(m); + /* Ensuring mbuf fields which got updated in + * otx2_nix_prefree_seg are written before LMTST. + */ + rte_io_wmb(); } /* Mark mempool object as "put" since it is freed by NIX */ if (!send_hdr->w0.df) @@ -395,6 +399,12 @@ otx2_nix_xmit_submit_lmt(const rte_iova_t io_addr) return otx2_lmt_submit(io_addr); } +static __rte_always_inline uint64_t +otx2_nix_xmit_submit_lmt_release(const rte_iova_t io_addr) +{ + return otx2_lmt_submit_release(io_addr); +} + static __rte_always_inline uint16_t otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) { @@ -483,6 +493,19 @@ otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, } while (lmt_status == 0); } +static __rte_always_inline void +otx2_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr, + rte_iova_t io_addr, uint16_t segdw) +{ + uint64_t lmt_status; + + rte_io_wmb(); + do { + otx2_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); + lmt_status = otx2_lmt_submit(io_addr); + } while (lmt_status == 0); +} + #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F -- 2.18.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [dpdk-dev] [dpdk-stable] [PATCH v2 3/4] net/octeontx2: fix jumbo frame crash 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 3/4] net/octeontx2: fix jumbo frame crash Harman Kalra @ 2020-10-16 4:04 ` Thomas Monjalon 0 siblings, 0 replies; 14+ messages in thread From: Thomas Monjalon @ 2020-10-16 4:04 UTC (permalink / raw) To: Jerin Jacob, Harman Kalra Cc: Nithin Dabilpuram, Pavan Nikhilesh, Kiran Kumar K, dev, david.marchand 08/10/2020 20:48, Harman Kalra: > + /* Copy four words if lmtext = 0 > + * six words if lmtext = 1 > + * eight words if lmtext =2 > + */ > + memcpy(out, in, (4 + (2 * lmtext)) * sizeof(uint64_t)); It raises an error when compiling for PowerPC: drivers/common/octeontx2/otx2_io_generic.h:63:2: error: implicit declaration of function ‘memcpy’ I've fixed it by including string.h. ^ permalink raw reply [flat|nested] 14+ messages in thread
* [dpdk-dev] [PATCH v2 4/4] app/eventdev: enable fast free offload 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Harman Kalra 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 2/4] event/octeontx2: improve single flow performance Harman Kalra 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 3/4] net/octeontx2: fix jumbo frame crash Harman Kalra @ 2020-10-08 18:48 ` Harman Kalra 2020-10-11 10:33 ` Jerin Jacob 2020-10-11 10:40 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Jerin Jacob 3 siblings, 1 reply; 14+ messages in thread From: Harman Kalra @ 2020-10-08 18:48 UTC (permalink / raw) To: Jerin Jacob; +Cc: dev, Harman Kalra Since we are not holding the mbufs or creating any references in the app, hence mbuf fast free offload can be enabled. Signed-off-by: Harman Kalra <hkalra@marvell.com> --- app/test-eventdev/test_pipeline_common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c index 17088b1b4..f0c0ffea7 100644 --- a/app/test-eventdev/test_pipeline_common.c +++ b/app/test-eventdev/test_pipeline_common.c @@ -219,6 +219,11 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt) return ret; } + /* Enable mbuf fast free if PMD has the capability. */ + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + local_port_conf.txmode.offloads |= + DEV_TX_OFFLOAD_MBUF_FAST_FREE; + rx_conf = dev_info.default_rxconf; rx_conf.offloads = port_conf.rxmode.offloads; -- 2.18.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] app/eventdev: enable fast free offload 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 4/4] app/eventdev: enable fast free offload Harman Kalra @ 2020-10-11 10:33 ` Jerin Jacob 2020-10-13 19:06 ` Jerin Jacob 0 siblings, 1 reply; 14+ messages in thread From: Jerin Jacob @ 2020-10-11 10:33 UTC (permalink / raw) To: Harman Kalra, Van Haaren, Harry, Mattias Rönnblom, Nipun Gupta, Pavan Nikhilesh, Erik Gabriel Carrillo, Rao, Nikhil, Hemant Agrawal, Gujjar, Abhinandan S Cc: Jerin Jacob, dpdk-dev On Fri, Oct 9, 2020 at 12:20 AM Harman Kalra <hkalra@marvell.com> wrote: > > Since we are not holding the mbufs or creating any references > in the app, hence mbuf fast free offload can be enabled. > > Signed-off-by: Harman Kalra <hkalra@marvell.com> > --- > app/test-eventdev/test_pipeline_common.c | 5 +++++ ++ eventdev maintainers Since testeventdev is not creating more than one pool and not using any mbuf reference feature. This change looks to me. Let me know if you have any different opinion if nay. Acked-by: Jerin Jacob <jerinj@marvell.com> > 1 file changed, 5 insertions(+) > > diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c > index 17088b1b4..f0c0ffea7 100644 > --- a/app/test-eventdev/test_pipeline_common.c > +++ b/app/test-eventdev/test_pipeline_common.c > @@ -219,6 +219,11 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt) > return ret; > } > > + /* Enable mbuf fast free if PMD has the capability. */ > + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) > + local_port_conf.txmode.offloads |= > + DEV_TX_OFFLOAD_MBUF_FAST_FREE; > + > rx_conf = dev_info.default_rxconf; > rx_conf.offloads = port_conf.rxmode.offloads; > > -- > 2.18.0 > ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] app/eventdev: enable fast free offload 2020-10-11 10:33 ` Jerin Jacob @ 2020-10-13 19:06 ` Jerin Jacob 0 siblings, 0 replies; 14+ messages in thread From: Jerin Jacob @ 2020-10-13 19:06 UTC (permalink / raw) To: Harman Kalra, Van Haaren, Harry, Mattias Rönnblom, Nipun Gupta, Pavan Nikhilesh, Erik Gabriel Carrillo, Rao, Nikhil, Hemant Agrawal, Gujjar, Abhinandan S Cc: Jerin Jacob, dpdk-dev On Sun, Oct 11, 2020 at 4:03 PM Jerin Jacob <jerinjacobk@gmail.com> wrote: > > On Fri, Oct 9, 2020 at 12:20 AM Harman Kalra <hkalra@marvell.com> wrote: > > > > Since we are not holding the mbufs or creating any references > > in the app, hence mbuf fast free offload can be enabled. > > > > Signed-off-by: Harman Kalra <hkalra@marvell.com> > > --- > > app/test-eventdev/test_pipeline_common.c | 5 +++++ > > ++ eventdev maintainers > > Since testeventdev is not creating more than one pool and not using > any mbuf reference feature. > This change looks to me. Let me know if you have any different opinion if nay. > > Acked-by: Jerin Jacob <jerinj@marvell.com> Applied to dpdk-next-eventdev/for-main. Thanks. > > > > 1 file changed, 5 insertions(+) > > > > diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c > > index 17088b1b4..f0c0ffea7 100644 > > --- a/app/test-eventdev/test_pipeline_common.c > > +++ b/app/test-eventdev/test_pipeline_common.c > > @@ -219,6 +219,11 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt) > > return ret; > > } > > > > + /* Enable mbuf fast free if PMD has the capability. */ > > + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) > > + local_port_conf.txmode.offloads |= > > + DEV_TX_OFFLOAD_MBUF_FAST_FREE; > > + > > rx_conf = dev_info.default_rxconf; > > rx_conf.offloads = port_conf.rxmode.offloads; > > > > -- > > 2.18.0 > > ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Harman Kalra ` (2 preceding siblings ...) 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 4/4] app/eventdev: enable fast free offload Harman Kalra @ 2020-10-11 10:40 ` Jerin Jacob 3 siblings, 0 replies; 14+ messages in thread From: Jerin Jacob @ 2020-10-11 10:40 UTC (permalink / raw) To: Harman Kalra; +Cc: Pavan Nikhilesh, Jerin Jacob, dpdk-dev On Fri, Oct 9, 2020 at 12:19 AM Harman Kalra <hkalra@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavatula@marvell.com> > > Add SWTAG flush operation at the end of transmit sequence to > immediately release the tag held by the core. > Reuse Tag address to check SWTAG completion status. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Applied to driver-specific patches(1 to 3)to dpdk-next-net-evntdev/for-main. Thanks. Waiting for feedback on the non-driver-specific 4/4 patch[1] [1] http://patches.dpdk.org/patch/80076/ ^ permalink raw reply [flat|nested] 14+ messages in thread
* [dpdk-dev] [PATCH 3/4] net/octeontx2: fix jumbo frame crash 2020-09-15 18:56 [dpdk-dev] [PATCH 1/4] event/octeontx2: add switch tag flush op Harman Kalra 2020-09-15 18:56 ` [dpdk-dev] [PATCH 2/4] event/octeontx2: improve single flow performance Harman Kalra @ 2020-09-15 18:56 ` Harman Kalra 2020-09-15 18:56 ` [dpdk-dev] [PATCH 4/4] app/eventdev: enable fast free offload Harman Kalra 2 siblings, 0 replies; 14+ messages in thread From: Harman Kalra @ 2020-09-15 18:56 UTC (permalink / raw) To: Jerin Jacob, Nithin Dabilpuram, Pavan Nikhilesh, Kiran Kumar K Cc: dev, Harman Kalra, stable Issue has been observed in case of multi segments where mbuf data gets corrupted due to missing barriers. Changes made to mbuf just before LMTST by one core gets updatded when the same mbuf is in use by another core, leading to corruption. It should be ensured that all changes made to mbuf should be written before LMTST. Fixes: cbd5710db48d ("net/octeontx2: add Tx multi segment version") Cc: stable@dpdk.org Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com> Signed-off-by: Harman Kalra <hkalra@marvell.com> --- drivers/common/octeontx2/otx2_io_arm64.h | 12 ++++++++++ drivers/common/octeontx2/otx2_io_generic.h | 16 +++++++++++--- drivers/event/octeontx2/otx2_worker.h | 20 +++++++++++++---- drivers/mempool/octeontx2/otx2_mempool_ops.c | 4 ++++ drivers/net/octeontx2/otx2_tx.c | 23 ++++++++++++++------ drivers/net/octeontx2/otx2_tx.h | 23 ++++++++++++++++++++ 6 files changed, 84 insertions(+), 14 deletions(-) diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h index 7e45329b3..b5c85d9a6 100644 --- a/drivers/common/octeontx2/otx2_io_arm64.h +++ b/drivers/common/octeontx2/otx2_io_arm64.h @@ -63,6 +63,18 @@ otx2_lmt_submit(rte_iova_t io_address) return result; } +static __rte_always_inline uint64_t +otx2_lmt_submit_release(rte_iova_t io_address) +{ + uint64_t result; + + asm volatile ( + ".cpu generic+lse\n" + "ldeorl xzr,%x[rf],[%[rs]]" : + [rf] "=r"(result) : [rs] "r"(io_address)); + return result; +} + static __rte_always_inline void otx2_lmt_mov(void *out, const void *in, const uint32_t lmtext) { diff --git a/drivers/common/octeontx2/otx2_io_generic.h b/drivers/common/octeontx2/otx2_io_generic.h index b1d754008..da64c9b31 100644 --- a/drivers/common/octeontx2/otx2_io_generic.h +++ b/drivers/common/octeontx2/otx2_io_generic.h @@ -45,12 +45,22 @@ otx2_lmt_submit(uint64_t io_address) return 0; } +static inline int64_t +otx2_lmt_submit_release(uint64_t io_address) +{ + RTE_SET_USED(io_address); + + return 0; +} + static __rte_always_inline void otx2_lmt_mov(void *out, const void *in, const uint32_t lmtext) { - RTE_SET_USED(out); - RTE_SET_USED(in); - RTE_SET_USED(lmtext); + /* Copy four words if lmtext = 0 + * six words if lmtext = 1 + * eight words if lmtext =2 + */ + memcpy(out, in, (4 + (2 * lmtext)) * sizeof(uint64_t)); } static __rte_always_inline void diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 32d611458..41e80182d 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -280,7 +280,19 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], /* Perform header writes before barrier for TSO */ otx2_nix_xmit_prepare_tso(m, flags); - rte_cio_wmb(); + /* Lets commit any changes in the packet here in case of single seg as + * no further changes to mbuf will be done. + * While for multi seg all mbufs used are set to NULL in + * otx2_nix_prepare_mseg() after preparing the sg list and these changes + * should be committed before LMTST. + * Also in no fast free case some mbuf fields are updated in + * otx2_nix_prefree_seg + * Hence otx2_nix_xmit_submit_lmt_release/otx2_nix_xmit_mseg_one_release + * has store barrier for multiseg. + */ + if (!(flags & NIX_TX_MULTI_SEG_F) && + !(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_cio_wmb(); txq = otx2_ssogws_xtract_meta(m, txq_data); otx2_ssogws_prepare_pkt(txq, m, cmd, flags); @@ -291,12 +303,12 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], if (!ev->sched_type) { otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); otx2_ssogws_head_wait(ws); - if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) + if (otx2_nix_xmit_submit_lmt_release(txq->io_addr) == 0) otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); } else { - otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, - segdw); + otx2_nix_xmit_mseg_one_release(cmd, txq->lmt_addr, + txq->io_addr, segdw); } } else { /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ diff --git a/drivers/mempool/octeontx2/otx2_mempool_ops.c b/drivers/mempool/octeontx2/otx2_mempool_ops.c index 5229a7cfb..de67aa3bd 100644 --- a/drivers/mempool/octeontx2/otx2_mempool_ops.c +++ b/drivers/mempool/octeontx2/otx2_mempool_ops.c @@ -15,6 +15,10 @@ otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n) const uint64_t addr = npa_lf_aura_handle_to_base(aura_handle) + NPA_LF_AURA_OP_FREE0; + /* Ensure mbuf init changes are written before the free pointers + * are enqueued to the stack. + */ + rte_cio_wmb(); for (index = 0; index < n; index++) otx2_store_pair((uint64_t)obj_table[index], reg, addr); diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c index 1af6fa649..ee5df48d3 100644 --- a/drivers/net/octeontx2/otx2_tx.c +++ b/drivers/net/octeontx2/otx2_tx.c @@ -38,8 +38,11 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, otx2_nix_xmit_prepare_tso(tx_pkts[i], flags); } - /* Lets commit any changes in the packet */ - rte_cio_wmb(); + /* Lets commit any changes in the packet here as no further changes + * to the packet will be done unless no fast free is enabled. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_cio_wmb(); for (i = 0; i < pkts; i++) { otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags); @@ -74,12 +77,11 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, otx2_nix_xmit_prepare_tso(tx_pkts[i], flags); } - /* Lets commit any changes in the packet */ - rte_cio_wmb(); - for (i = 0; i < pkts; i++) { otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags); segdw = otx2_nix_prepare_mseg(tx_pkts[i], cmd, flags); + /* Lets commit any changes in the packet */ + rte_cio_wmb(); otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], tx_pkts[i]->ol_flags, segdw, flags); @@ -127,8 +129,11 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, /* Reduce the cached count */ txq->fc_cache_pkts -= pkts; - /* Lets commit any changes in the packet */ - rte_cio_wmb(); + /* Lets commit any changes in the packet here as no further changes + * to the packet will be done unless no fast free is enabled. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_cio_wmb(); senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]); senddesc23_w0 = senddesc01_w0; @@ -221,6 +226,10 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, 1, 0); senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + /* Ensuring mbuf fields which got updated in + * otx2_nix_prefree_seg are written before LMTST. + */ + rte_cio_wmb(); } else { struct rte_mbuf *mbuf; /* Mark mempool object as "put" since diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h index caf170fd1..5b78d2e2d 100644 --- a/drivers/net/octeontx2/otx2_tx.h +++ b/drivers/net/octeontx2/otx2_tx.h @@ -363,6 +363,10 @@ otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) * DF bit = 0 otherwise */ send_hdr->w0.df = otx2_nix_prefree_seg(m); + /* Ensuring mbuf fields which got updated in + * otx2_nix_prefree_seg are written before LMTST. + */ + rte_cio_wmb(); } /* Mark mempool object as "put" since it is freed by NIX */ if (!send_hdr->w0.df) @@ -395,6 +399,12 @@ otx2_nix_xmit_submit_lmt(const rte_iova_t io_addr) return otx2_lmt_submit(io_addr); } +static __rte_always_inline uint64_t +otx2_nix_xmit_submit_lmt_release(const rte_iova_t io_addr) +{ + return otx2_lmt_submit_release(io_addr); +} + static __rte_always_inline uint16_t otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) { @@ -483,6 +493,19 @@ otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, } while (lmt_status == 0); } +static __rte_always_inline void +otx2_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr, + rte_iova_t io_addr, uint16_t segdw) +{ + uint64_t lmt_status; + + rte_cio_wmb(); + do { + otx2_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); + lmt_status = otx2_lmt_submit(io_addr); + } while (lmt_status == 0); +} + #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F -- 2.18.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* [dpdk-dev] [PATCH 4/4] app/eventdev: enable fast free offload 2020-09-15 18:56 [dpdk-dev] [PATCH 1/4] event/octeontx2: add switch tag flush op Harman Kalra 2020-09-15 18:56 ` [dpdk-dev] [PATCH 2/4] event/octeontx2: improve single flow performance Harman Kalra 2020-09-15 18:56 ` [dpdk-dev] [PATCH 3/4] net/octeontx2: fix jumbo frame crash Harman Kalra @ 2020-09-15 18:56 ` Harman Kalra 2020-10-05 9:26 ` Jerin Jacob 2 siblings, 1 reply; 14+ messages in thread From: Harman Kalra @ 2020-09-15 18:56 UTC (permalink / raw) To: Jerin Jacob; +Cc: dev, Harman Kalra Since we are not holding the mbufs or creating any references in the app, hence mbuf fast free offload can be enabled. Signed-off-by: Harman Kalra <hkalra@marvell.com> --- app/test-eventdev/test_pipeline_common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c index 17088b1b4..f0c0ffea7 100644 --- a/app/test-eventdev/test_pipeline_common.c +++ b/app/test-eventdev/test_pipeline_common.c @@ -219,6 +219,11 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt) return ret; } + /* Enable mbuf fast free if PMD has the capability. */ + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + local_port_conf.txmode.offloads |= + DEV_TX_OFFLOAD_MBUF_FAST_FREE; + rx_conf = dev_info.default_rxconf; rx_conf.offloads = port_conf.rxmode.offloads; -- 2.18.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [dpdk-dev] [PATCH 4/4] app/eventdev: enable fast free offload 2020-09-15 18:56 ` [dpdk-dev] [PATCH 4/4] app/eventdev: enable fast free offload Harman Kalra @ 2020-10-05 9:26 ` Jerin Jacob 0 siblings, 0 replies; 14+ messages in thread From: Jerin Jacob @ 2020-10-05 9:26 UTC (permalink / raw) To: Harman Kalra; +Cc: Jerin Jacob, dpdk-dev On Wed, Sep 16, 2020 at 12:27 AM Harman Kalra <hkalra@marvell.com> wrote: > > Since we are not holding the mbufs or creating any references > in the app, hence mbuf fast free offload can be enabled. > > Signed-off-by: Harman Kalra <hkalra@marvell.com> Reviewed-by: Jerin Jacob <jerinj@marvell.com> > --- > app/test-eventdev/test_pipeline_common.c | 5 +++++ > 1 file changed, 5 insertions(+) > > diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c > index 17088b1b4..f0c0ffea7 100644 > --- a/app/test-eventdev/test_pipeline_common.c > +++ b/app/test-eventdev/test_pipeline_common.c > @@ -219,6 +219,11 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt) > return ret; > } > > + /* Enable mbuf fast free if PMD has the capability. */ > + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) > + local_port_conf.txmode.offloads |= > + DEV_TX_OFFLOAD_MBUF_FAST_FREE; > + > rx_conf = dev_info.default_rxconf; > rx_conf.offloads = port_conf.rxmode.offloads; > > -- > 2.18.0 > ^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2020-10-16 4:04 UTC | newest] Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-09-15 18:56 [dpdk-dev] [PATCH 1/4] event/octeontx2: add switch tag flush op Harman Kalra 2020-09-15 18:56 ` [dpdk-dev] [PATCH 2/4] event/octeontx2: improve single flow performance Harman Kalra 2020-10-05 9:29 ` Jerin Jacob 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Harman Kalra 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 2/4] event/octeontx2: improve single flow performance Harman Kalra 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 3/4] net/octeontx2: fix jumbo frame crash Harman Kalra 2020-10-16 4:04 ` [dpdk-dev] [dpdk-stable] " Thomas Monjalon 2020-10-08 18:48 ` [dpdk-dev] [PATCH v2 4/4] app/eventdev: enable fast free offload Harman Kalra 2020-10-11 10:33 ` Jerin Jacob 2020-10-13 19:06 ` Jerin Jacob 2020-10-11 10:40 ` [dpdk-dev] [PATCH v2 1/4] event/octeontx2: add switch tag flush op Jerin Jacob 2020-09-15 18:56 ` [dpdk-dev] [PATCH 3/4] net/octeontx2: fix jumbo frame crash Harman Kalra 2020-09-15 18:56 ` [dpdk-dev] [PATCH 4/4] app/eventdev: enable fast free offload Harman Kalra 2020-10-05 9:26 ` Jerin Jacob
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).