DPDK patches and discussions
 help / color / mirror / Atom feed
From: <pbhagavatula@marvell.com>
To: <jerinj@marvell.com>, Nithin Dabilpuram <ndabilpuram@marvell.com>,
	"Kiran Kumar K" <kirankumark@marvell.com>,
	Sunil Kumar Kori <skori@marvell.com>,
	Satha Rao <skoteshwar@marvell.com>,
	Pavan Nikhilesh <pbhagavatula@marvell.com>,
	Shijith Thotton <sthotton@marvell.com>
Cc: <dev@dpdk.org>
Subject: [PATCH 2/4] event/cnxk: store and reuse workslot status
Date: Tue, 14 Dec 2021 02:44:22 +0530	[thread overview]
Message-ID: <20211213211425.6332-2-pbhagavatula@marvell.com> (raw)
In-Reply-To: <20211213211425.6332-1-pbhagavatula@marvell.com>

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Store and reuse workslot status for TT, GRP and HEAD status
instead of reading from GWC as reading from GWC imposes
additional latency.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/common/cnxk/roc_sso.h      | 11 ++++++-----
 drivers/event/cnxk/cn10k_worker.h  | 17 ++++++++++-------
 drivers/event/cnxk/cnxk_eventdev.h |  2 ++
 drivers/event/cnxk/cnxk_worker.h   | 11 +++++++----
 drivers/net/cnxk/cn10k_tx.h        | 12 ++++++------
 5 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 27d49c6c68..9c594f5c1c 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -54,12 +54,12 @@ struct roc_sso {
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
 } __plt_cache_aligned;

-static __plt_always_inline void
+static __plt_always_inline uint64_t
 roc_sso_hws_head_wait(uintptr_t tag_op)
 {
-#ifdef RTE_ARCH_ARM64
 	uint64_t tag;

+#ifdef RTE_ARCH_ARM64
 	asm volatile(PLT_CPU_FEATURE_PREAMBLE
 		     "		ldr %[tag], [%[tag_op]]	\n"
 		     "		tbnz %[tag], 35, done%=		\n"
@@ -71,10 +71,11 @@ roc_sso_hws_head_wait(uintptr_t tag_op)
 		     : [tag] "=&r"(tag)
 		     : [tag_op] "r"(tag_op));
 #else
-	/* Wait for the SWTAG/SWTAG_FULL operation */
-	while (!(plt_read64(tag_op) & BIT_ULL(35)))
-		;
+	do {
+		tag = plt_read64(tag_op);
+	} while (!(tag & BIT_ULL(35)));
 #endif
+	return tag;
 }

 /* SSO device initialization */
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index e80e4fb895..65602a632e 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -40,8 +40,7 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
 {
 	const uint32_t tag = (uint32_t)ev->event;
 	const uint8_t new_tt = ev->sched_type;
-	const uint8_t cur_tt =
-		CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);

 	/* CNXK model
 	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -81,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 	const uint8_t grp = ev->queue_id;

 	/* Group hasn't changed, Use SWTAG to forward the event */
-	if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
 		cn10k_sso_hws_fwd_swtag(ws, ev);
 	else
 		/*
@@ -211,6 +210,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 	} while (gw.u64[0] & BIT_ULL(63));
 	mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
 #endif
+	ws->gw_rdata = gw.u64[0];
 	gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
 		    (gw.u64[0] & (0x3FFull << 36)) << 4 |
 		    (gw.u64[0] & 0xffffffff);
@@ -406,7 +406,8 @@ NIX_RX_FASTPATH_MODES
 		RTE_SET_USED(timeout_ticks);                                   \
 		if (ws->swtag_req) {                                           \
 			ws->swtag_req = 0;                                     \
-			cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);  \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(                \
+				ws->base + SSOW_LF_GWS_WQE0);                  \
 			return 1;                                              \
 		}                                                              \
 		return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);  \
@@ -426,7 +427,8 @@ NIX_RX_FASTPATH_MODES
                                                                                \
 		if (ws->swtag_req) {                                           \
 			ws->swtag_req = 0;                                     \
-			cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);  \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(                \
+				ws->base + SSOW_LF_GWS_WQE0);                  \
 			return ret;                                            \
 		}                                                              \
 		ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);   \
@@ -509,8 +511,9 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
 	else
 		pa = txq->io_addr | ((segdw - 1) << 4);

-	if (!sched_type)
-		roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG);
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata =
+			roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG);

 	roc_lmt_submit_steorl(lmt_id, pa);
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b26df58588..ab58508590 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -47,6 +47,7 @@
 #define CNXK_CLR_SUB_EVENT(x)	    (~(0xffu << 20) & x)
 #define CNXK_GRP_FROM_TAG(x)	    (((x) >> 36) & 0x3ff)
 #define CNXK_SWTAG_PEND(x)	    (BIT_ULL(62) & x)
+#define CNXK_TAG_IS_HEAD(x)	    (BIT_ULL(35) & x)

 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)

@@ -123,6 +124,7 @@ struct cnxk_sso_evdev {

 struct cn10k_sso_hws {
 	uint64_t base;
+	uint64_t gw_rdata;
 	/* PTP timestamp */
 	struct cnxk_timesync_info *tstamp;
 	void *lookup_mem;
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 9f9ceab8a1..7de03f3fbb 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -52,11 +52,11 @@ cnxk_sso_hws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
 	plt_write64(0, flush_op);
 }

-static __rte_always_inline void
+static __rte_always_inline uint64_t
 cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
 {
-#ifdef RTE_ARCH_ARM64
 	uint64_t swtp;
+#ifdef RTE_ARCH_ARM64

 	asm volatile(PLT_CPU_FEATURE_PREAMBLE
 		     "		ldr %[swtb], [%[swtp_loc]]	\n"
@@ -70,9 +70,12 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
 		     : [swtp_loc] "r"(tag_op));
 #else
 	/* Wait for the SWTAG/SWTAG_FULL operation */
-	while (plt_read64(tag_op) & BIT_ULL(62))
-		;
+	do {
+		swtp = plt_read64(tag_op);
+	} while (swtp & BIT_ULL(62));
 #endif
+
+	return swtp;
 }

 #endif
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index b3034c72cb..8b2f1c868e 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -905,8 +905,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
 			lnum++;
 	}

-	if (flags & NIX_TX_VWQE_F)
-		roc_sso_hws_head_wait(ws[0]);
+	if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+		ws[1] = roc_sso_hws_head_wait(ws[0]);

 	left -= burst;
 	tx_pkts += burst;
@@ -1041,8 +1041,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
 		}
 	}

-	if (flags & NIX_TX_VWQE_F)
-		roc_sso_hws_head_wait(ws[0]);
+	if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+		ws[1] = roc_sso_hws_head_wait(ws[0]);

 	left -= burst;
 	tx_pkts += burst;
@@ -2582,8 +2582,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 	if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
 		wd.data[0] >>= 16;

-	if (flags & NIX_TX_VWQE_F)
-		roc_sso_hws_head_wait(ws[0]);
+	if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+		ws[1] = roc_sso_hws_head_wait(ws[0]);

 	left -= burst;

--
2.17.1


  reply	other threads:[~2021-12-13 21:15 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-13 21:14 [PATCH 1/4] net/cnxk: avoid command copy from Tx queue pbhagavatula
2021-12-13 21:14 ` pbhagavatula [this message]
2021-12-13 21:14 ` [PATCH 3/4] event/cnxk: disable default wait time for dequeue pbhagavatula
2021-12-13 21:14 ` [PATCH 4/4] net/cnxk: improve Rx performance pbhagavatula

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211213211425.6332-2-pbhagavatula@marvell.com \
    --to=pbhagavatula@marvell.com \
    --cc=dev@dpdk.org \
    --cc=jerinj@marvell.com \
    --cc=kirankumark@marvell.com \
    --cc=ndabilpuram@marvell.com \
    --cc=skori@marvell.com \
    --cc=skoteshwar@marvell.com \
    --cc=sthotton@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).