DPDK patches and discussions
 help / color / mirror / Atom feed
From: Jerin Jacob <jerinjacobk@gmail.com>
To: Pavan Nikhilesh <pbhagavatula@marvell.com>
Cc: Jerin Jacob <jerinj@marvell.com>,
	Nithin Dabilpuram <ndabilpuram@marvell.com>,
	 Kiran Kumar K <kirankumark@marvell.com>,
	Sunil Kumar Kori <skori@marvell.com>,
	 Satha Rao <skoteshwar@marvell.com>,
	Shijith Thotton <sthotton@marvell.com>, dpdk-dev <dev@dpdk.org>
Subject: Re: [PATCH v4 1/3] event/cnxk: store and reuse workslot status
Date: Mon, 14 Feb 2022 14:59:00 +0530	[thread overview]
Message-ID: <CALBAE1OfygF=gzcZHiNJzCJhKr+JnzYeVe_pN5G9MzNF1jPHOg@mail.gmail.com> (raw)
In-Reply-To: <20220210132047.2429-1-pbhagavatula@marvell.com>

On Thu, Feb 10, 2022 at 6:51 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Store and reuse workslot status for TT, GRP and HEAD status
> instead of reading from GWC as reading from GWC imposes
> additional latency.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

Series Acked-by: Jerin Jacob <jerinj@marvell.com>
Series Applied to dpdk-next-net-eventdev/for-main. Thanks


> ---
>  Depends-on: 21590
>
>  v4 Changes:
>  - Update commit title for 3/3
>
>  v3 Changes:
>  - Split and rebase patches.
>
>  v2 Changes:
>  - Rebase.
>  - Fix incorrect use of RoC API
>
>  drivers/common/cnxk/roc_sso.h      | 14 ++++++++------
>  drivers/event/cnxk/cn10k_worker.h  | 16 +++++++++-------
>  drivers/event/cnxk/cn9k_worker.h   |  6 +++---
>  drivers/event/cnxk/cnxk_eventdev.h |  2 ++
>  drivers/event/cnxk/cnxk_worker.h   | 11 +++++++----
>  drivers/net/cnxk/cn10k_tx.h        | 12 ++++++------
>  6 files changed, 35 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
> index 27d49c6c68..ab7cee1c60 100644
> --- a/drivers/common/cnxk/roc_sso.h
> +++ b/drivers/common/cnxk/roc_sso.h
> @@ -54,12 +54,13 @@ struct roc_sso {
>         uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
>  } __plt_cache_aligned;
>
> -static __plt_always_inline void
> -roc_sso_hws_head_wait(uintptr_t tag_op)
> +static __plt_always_inline uint64_t
> +roc_sso_hws_head_wait(uintptr_t base)
>  {
> -#ifdef RTE_ARCH_ARM64
> +       uintptr_t tag_op = base + SSOW_LF_GWS_TAG;
>         uint64_t tag;
>
> +#if defined(__aarch64__)
>         asm volatile(PLT_CPU_FEATURE_PREAMBLE
>                      "          ldr %[tag], [%[tag_op]] \n"
>                      "          tbnz %[tag], 35, done%=         \n"
> @@ -71,10 +72,11 @@ roc_sso_hws_head_wait(uintptr_t tag_op)
>                      : [tag] "=&r"(tag)
>                      : [tag_op] "r"(tag_op));
>  #else
> -       /* Wait for the SWTAG/SWTAG_FULL operation */
> -       while (!(plt_read64(tag_op) & BIT_ULL(35)))
> -               ;
> +       do {
> +               tag = plt_read64(tag_op);
> +       } while (!(tag & BIT_ULL(35)));
>  #endif
> +       return tag;
>  }
>
>  /* SSO device initialization */
> diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
> index ff08b2d974..ada230ea1d 100644
> --- a/drivers/event/cnxk/cn10k_worker.h
> +++ b/drivers/event/cnxk/cn10k_worker.h
> @@ -40,8 +40,7 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
>  {
>         const uint32_t tag = (uint32_t)ev->event;
>         const uint8_t new_tt = ev->sched_type;
> -       const uint8_t cur_tt =
> -               CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
> +       const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
>
>         /* CNXK model
>          * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
> @@ -81,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
>         const uint8_t grp = ev->queue_id;
>
>         /* Group hasn't changed, Use SWTAG to forward the event */
> -       if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
> +       if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
>                 cn10k_sso_hws_fwd_swtag(ws, ev);
>         else
>                 /*
> @@ -211,6 +210,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
>         } while (gw.u64[0] & BIT_ULL(63));
>         mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
>  #endif
> +       ws->gw_rdata = gw.u64[0];
>         gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
>                     (gw.u64[0] & (0x3FFull << 36)) << 4 |
>                     (gw.u64[0] & 0xffffffff);
> @@ -405,7 +405,8 @@ NIX_RX_FASTPATH_MODES
>                 RTE_SET_USED(timeout_ticks);                                   \
>                 if (ws->swtag_req) {                                           \
>                         ws->swtag_req = 0;                                     \
> -                       cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);  \
> +                       ws->gw_rdata = cnxk_sso_hws_swtag_wait(                \
> +                               ws->base + SSOW_LF_GWS_WQE0);                  \
>                         return 1;                                              \
>                 }                                                              \
>                 return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);  \
> @@ -424,7 +425,8 @@ NIX_RX_FASTPATH_MODES
>                 uint64_t iter;                                                 \
>                 if (ws->swtag_req) {                                           \
>                         ws->swtag_req = 0;                                     \
> -                       cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);  \
> +                       ws->gw_rdata = cnxk_sso_hws_swtag_wait(                \
> +                               ws->base + SSOW_LF_GWS_WQE0);                  \
>                         return ret;                                            \
>                 }                                                              \
>                 ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);   \
> @@ -507,8 +509,8 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
>         else
>                 pa = txq->io_addr | ((segdw - 1) << 4);
>
> -       if (!sched_type)
> -               roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG);
> +       if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
> +               ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
>
>         roc_lmt_submit_steorl(lmt_id, pa);
>  }
> diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
> index 303b04c215..8455272005 100644
> --- a/drivers/event/cnxk/cn9k_worker.h
> +++ b/drivers/event/cnxk/cn9k_worker.h
> @@ -700,7 +700,7 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, uint64_t base,
>
>         /* Head wait if needed */
>         if (base)
> -               roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> +               roc_sso_hws_head_wait(base);
>
>         /* ESN */
>         outb_priv = roc_nix_inl_onf_ipsec_outb_sa_sw_rsvd((void *)sa);
> @@ -793,7 +793,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
>                                              flags);
>                 if (!CNXK_TT_FROM_EVENT(ev->event)) {
>                         cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
> -                       roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> +                       roc_sso_hws_head_wait(base);
>                         cn9k_sso_txq_fc_wait(txq);
>                         if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
>                                 cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
> @@ -806,7 +806,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
>                 cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, 4, flags);
>                 if (!CNXK_TT_FROM_EVENT(ev->event)) {
>                         cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
> -                       roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> +                       roc_sso_hws_head_wait(base);
>                         cn9k_sso_txq_fc_wait(txq);
>                         if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
>                                 cn9k_nix_xmit_one(cmd, txq->lmt_addr,
> diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
> index b26df58588..ab58508590 100644
> --- a/drivers/event/cnxk/cnxk_eventdev.h
> +++ b/drivers/event/cnxk/cnxk_eventdev.h
> @@ -47,6 +47,7 @@
>  #define CNXK_CLR_SUB_EVENT(x)      (~(0xffu << 20) & x)
>  #define CNXK_GRP_FROM_TAG(x)       (((x) >> 36) & 0x3ff)
>  #define CNXK_SWTAG_PEND(x)         (BIT_ULL(62) & x)
> +#define CNXK_TAG_IS_HEAD(x)        (BIT_ULL(35) & x)
>
>  #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
>
> @@ -123,6 +124,7 @@ struct cnxk_sso_evdev {
>
>  struct cn10k_sso_hws {
>         uint64_t base;
> +       uint64_t gw_rdata;
>         /* PTP timestamp */
>         struct cnxk_timesync_info *tstamp;
>         void *lookup_mem;
> diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
> index 9f9ceab8a1..7de03f3fbb 100644
> --- a/drivers/event/cnxk/cnxk_worker.h
> +++ b/drivers/event/cnxk/cnxk_worker.h
> @@ -52,11 +52,11 @@ cnxk_sso_hws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
>         plt_write64(0, flush_op);
>  }
>
> -static __rte_always_inline void
> +static __rte_always_inline uint64_t
>  cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
>  {
> -#ifdef RTE_ARCH_ARM64
>         uint64_t swtp;
> +#ifdef RTE_ARCH_ARM64
>
>         asm volatile(PLT_CPU_FEATURE_PREAMBLE
>                      "          ldr %[swtb], [%[swtp_loc]]      \n"
> @@ -70,9 +70,12 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
>                      : [swtp_loc] "r"(tag_op));
>  #else
>         /* Wait for the SWTAG/SWTAG_FULL operation */
> -       while (plt_read64(tag_op) & BIT_ULL(62))
> -               ;
> +       do {
> +               swtp = plt_read64(tag_op);
> +       } while (swtp & BIT_ULL(62));
>  #endif
> +
> +       return swtp;
>  }
>
>  #endif
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index 4ae6bbf517..ec6366168c 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -905,8 +905,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
>                         lnum++;
>         }
>
> -       if (flags & NIX_TX_VWQE_F)
> -               roc_sso_hws_head_wait(ws[0]);
> +       if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
> +               ws[1] = roc_sso_hws_head_wait(ws[0]);
>
>         left -= burst;
>         tx_pkts += burst;
> @@ -1041,8 +1041,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
>                 }
>         }
>
> -       if (flags & NIX_TX_VWQE_F)
> -               roc_sso_hws_head_wait(ws[0]);
> +       if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
> +               ws[1] = roc_sso_hws_head_wait(ws[0]);
>
>         left -= burst;
>         tx_pkts += burst;
> @@ -2582,8 +2582,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
>         if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
>                 wd.data[0] >>= 16;
>
> -       if (flags & NIX_TX_VWQE_F)
> -               roc_sso_hws_head_wait(ws[0]);
> +       if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
> +               ws[1] = roc_sso_hws_head_wait(ws[0]);
>
>         left -= burst;
>
> --
> 2.17.1
>

      parent reply	other threads:[~2022-02-14  9:29 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-19  7:13 [PATCH v2 1/4] net/cnxk: avoid command copy from Tx queue pbhagavatula
2022-01-19  7:13 ` [PATCH v2 2/4] event/cnxk: store and reuse workslot status pbhagavatula
2022-01-19  7:13 ` [PATCH v2 3/4] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-01-19  7:13 ` [PATCH v2 4/4] net/cnxk: improve Rx performance pbhagavatula
2022-02-07 14:03 ` [PATCH v2 1/4] net/cnxk: avoid command copy from Tx queue Jerin Jacob
2022-02-10 10:13 ` [PATCH v3] " pbhagavatula
2022-02-10 10:19   ` Jerin Jacob
2022-02-10 13:15   ` [PATCH v4] " pbhagavatula
2022-02-11 10:27     ` Jerin Jacob
2022-02-10 10:19 ` [PATCH v3 1/3] event/cnxk: store and reuse workslot status pbhagavatula
2022-02-10 10:19   ` [PATCH v3 2/3] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-02-10 10:19   ` [PATCH v3 3/3] net/cnxk: improve Rx performance pbhagavatula
2022-02-10 13:20   ` [PATCH v4 1/3] event/cnxk: store and reuse workslot status pbhagavatula
2022-02-10 13:20     ` [PATCH v4 2/3] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-02-10 13:20     ` [PATCH v4 3/3] event/cnxk: improve Rx performance pbhagavatula
2022-02-14  9:29     ` Jerin Jacob [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CALBAE1OfygF=gzcZHiNJzCJhKr+JnzYeVe_pN5G9MzNF1jPHOg@mail.gmail.com' \
    --to=jerinjacobk@gmail.com \
    --cc=dev@dpdk.org \
    --cc=jerinj@marvell.com \
    --cc=kirankumark@marvell.com \
    --cc=ndabilpuram@marvell.com \
    --cc=pbhagavatula@marvell.com \
    --cc=skori@marvell.com \
    --cc=skoteshwar@marvell.com \
    --cc=sthotton@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).