From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 79A2145B4C;
	Tue, 22 Oct 2024 21:36:05 +0200 (CEST)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id E6E6A406BB;
	Tue, 22 Oct 2024 21:36:01 +0200 (CEST)
Received: from mx0a-0016f401.pphosted.com (mx0a-0016f401.pphosted.com
 [67.231.148.174])
 by mails.dpdk.org (Postfix) with ESMTP id A837C402E9
 for <dev@dpdk.org>; Tue, 22 Oct 2024 21:35:33 +0200 (CEST)
Received: from pps.filterd (m0431384.ppops.net [127.0.0.1])
 by mx0a-0016f401.pphosted.com (8.18.1.2/8.18.1.2) with ESMTP id 49MFU9qZ004130;
 Tue, 22 Oct 2024 12:35:32 -0700
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=
 cc:content-transfer-encoding:content-type:date:from:in-reply-to
 :message-id:mime-version:references:subject:to; s=pfpt0220; bh=x
 NN2GlcTf1G0ZuSsCUBBCHsCyuIBpNKIHGq4qQZmUrM=; b=PrdQLn27lFCF2sLgd
 os/9KvRZaNLy8nbLDk9Vq4j0MAHxg+9LX69fjAm09JEracTMeMVAFJsBdoPX0Swg
 qoe9vpscwv//qAVdALiTgNNbV4PJvdt9ndOSRkHASBT2qUq8r8UZlpkFSEjTWTqK
 7qS4MbQ+6hcDG+NsKY+HpZdVChIh5yuOUXIucbHQ2r9Mv3R5oKHOosr0gpdPrpFe
 13hfrIhNUvQ33CJ5oTfFR79HCO3liI/YZ5yXj4NsRzO/CynR1N+O98t9cxYsBsoZ
 BhIyxtfbgck/UjaeXT7tM/zIw1LwJ3RNJE6ERknFas31T92avQL8QIupDpBa/Ksw
 kk7fg==
Received: from dc6wp-exch02.marvell.com ([4.21.29.225])
 by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 42eera0n0p-1
 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=NOT);
 Tue, 22 Oct 2024 12:35:32 -0700 (PDT)
Received: from DC6WP-EXCH02.marvell.com (10.76.176.209) by
 DC6WP-EXCH02.marvell.com (10.76.176.209) with Microsoft SMTP Server
 (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id
 15.2.1544.4; Tue, 22 Oct 2024 12:35:31 -0700
Received: from maili.marvell.com (10.69.176.80) by DC6WP-EXCH02.marvell.com
 (10.76.176.209) with Microsoft SMTP Server id 15.2.1544.4 via Frontend
 Transport; Tue, 22 Oct 2024 12:35:31 -0700
Received: from MININT-80QBFE8.corp.innovium.com (MININT-80QBFE8.marvell.com
 [10.28.164.106])
 by maili.marvell.com (Postfix) with ESMTP id 36CD43F705F;
 Tue, 22 Oct 2024 12:35:28 -0700 (PDT)
From: <pbhagavatula@marvell.com>
To: <jerinj@marvell.com>, <stephen@networkplumber.org>, Pavan Nikhilesh
 <pbhagavatula@marvell.com>, Shijith Thotton <sthotton@marvell.com>
CC: <dev@dpdk.org>
Subject: [PATCH v4 08/22] event/cnxk: add CN20K SSO dequeue fast path
Date: Wed, 23 Oct 2024 01:04:51 +0530
Message-ID: <20241022193505.23281-8-pbhagavatula@marvell.com>
X-Mailer: git-send-email 2.25.1
In-Reply-To: <20241022193505.23281-1-pbhagavatula@marvell.com>
References: <20241022084641.14497-1-pbhagavatula@marvell.com>
 <20241022193505.23281-1-pbhagavatula@marvell.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
X-Proofpoint-ORIG-GUID: q20opuPxt9pDRWNb37YHBJ5KhbL_V23e
X-Proofpoint-GUID: q20opuPxt9pDRWNb37YHBJ5KhbL_V23e
X-Proofpoint-Virus-Version: vendor=baseguard
 engine=ICAP:2.0.293,Aquarius:18.0.687,Hydra:6.0.235,FMLib:17.0.607.475
 definitions=2020-10-13_15,2020-10-13_02,2020-04-07_01
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add CN20K SSO GWS event dequeue fastpath functions.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn20k_eventdev.c |   5 +
 drivers/event/cnxk/cn20k_worker.c   |  54 +++++++++++
 drivers/event/cnxk/cn20k_worker.h   | 137 +++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/event/cnxk/cn20k_eventdev.c b/drivers/event/cnxk/cn20k_eventdev.c
index a5dd03de6e..d1668a00c1 100644
--- a/drivers/event/cnxk/cn20k_eventdev.c
+++ b/drivers/event/cnxk/cn20k_eventdev.c
@@ -114,11 +114,16 @@ static void
 cn20k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 #if defined(RTE_ARCH_ARM64)
+	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
 
 	event_dev->enqueue_burst = cn20k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn20k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn20k_sso_hws_enq_fwd_burst;
 
+	event_dev->dequeue_burst = cn20k_sso_hws_deq_burst;
+	if (dev->deq_tmo_ns)
+		event_dev->dequeue_burst = cn20k_sso_hws_tmo_deq_burst;
+
 #else
 	RTE_SET_USED(event_dev);
 #endif
diff --git a/drivers/event/cnxk/cn20k_worker.c b/drivers/event/cnxk/cn20k_worker.c
index c7de493681..2dcde0b444 100644
--- a/drivers/event/cnxk/cn20k_worker.c
+++ b/drivers/event/cnxk/cn20k_worker.c
@@ -382,3 +382,57 @@ cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb
 
 	return 1;
 }
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+
+	RTE_SET_USED(timeout_ticks);
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return 1;
+	}
+
+	return cn20k_sso_hws_get_work(ws, ev, 0);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_deq(port, ev, timeout_ticks);
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
+{
+	struct cn20k_sso_hws *ws = port;
+	uint16_t ret = 1;
+	uint64_t iter;
+
+	if (ws->swtag_req) {
+		ws->swtag_req = 0;
+		cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);
+		return ret;
+	}
+
+	ret = cn20k_sso_hws_get_work(ws, ev, 0);
+	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
+		ret = cn20k_sso_hws_get_work(ws, ev, 0);
+
+	return ret;
+}
+
+uint16_t __rte_hot
+cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+			    uint64_t timeout_ticks)
+{
+	RTE_SET_USED(nb_events);
+
+	return cn20k_sso_hws_tmo_deq(port, ev, timeout_ticks);
+}
diff --git a/drivers/event/cnxk/cn20k_worker.h b/drivers/event/cnxk/cn20k_worker.h
index 5ff8f11b38..8dc60a06ec 100644
--- a/drivers/event/cnxk/cn20k_worker.h
+++ b/drivers/event/cnxk/cn20k_worker.h
@@ -7,8 +7,136 @@
 
 #include <rte_eventdev.h>
 
-#include "cnxk_worker.h"
 #include "cn20k_eventdev.h"
+#include "cnxk_worker.h"
+
+static __rte_always_inline void
+cn20k_sso_hws_post_process(struct cn20k_sso_hws *ws, uint64_t *u64, const uint32_t flags)
+{
+	RTE_SET_USED(ws);
+	RTE_SET_USED(flags);
+
+	u64[0] = (u64[0] & (0x3ull << 32)) << 6 | (u64[0] & (0x3FFull << 36)) << 4 |
+		 (u64[0] & 0xffffffff);
+}
+
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+	gw.get_work = ws->gw_wdata;
+#if defined(RTE_ARCH_ARM64)
+#if defined(__clang__)
+	register uint64_t x0 __asm("x0") = (uint64_t)gw.u64[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)gw.u64[1];
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbz %[x0], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[x0], %[x1], [%[tag_loc]]	\n"
+		     "		tbnz %[x0], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(".arch armv8-a+lse\n"
+		     "caspal %[x0], %[x1], %[x0], %[x1], [%[dst]]\n"
+		     : [x0] "+r" (x0), [x1] "+r" (x1)
+		     : [dst] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+	gw.u64[0] = x0;
+	gw.u64[1] = x1;
+#else
+#if defined(RTE_ARM_USE_WFE)
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbz %[wdata], %[pend_gw], done%=	\n"
+		     "		sevl					\n"
+		     "rty%=:	wfe					\n"
+		     "		ldp %[wdata], %H[wdata], [%[tag_loc]]	\n"
+		     "		tbnz %[wdata], %[pend_gw], rty%=	\n"
+		     "done%=:						\n"
+		     "		dmb ld					\n"
+		     : [wdata] "=&r"(gw.get_work)
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0),
+		       [pend_gw] "i"(SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
+		     : "memory");
+#else
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+		     : [wdata] "+r"(gw.get_work)
+		     : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
+		     : "memory");
+#endif
+#endif
+#else
+	plt_write64(gw.u64[0], ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
+#endif
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
+
+/* Used in cleaning up workslot. */
+static __rte_always_inline uint16_t
+cn20k_sso_hws_get_work_empty(struct cn20k_sso_hws *ws, struct rte_event *ev, const uint32_t flags)
+{
+	union {
+		__uint128_t get_work;
+		uint64_t u64[2];
+	} gw;
+
+#ifdef RTE_ARCH_ARM64
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbz %[tag], 63, .Ldone%=		\n"
+		     "		sevl					\n"
+		     ".Lrty%=:	wfe					\n"
+		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
+		     "		tbnz %[tag], 63, .Lrty%=		\n"
+		     ".Ldone%=:	dmb ld					\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     : [tag_loc] "r"(ws->base + SSOW_LF_GWS_WQE0)
+		     : "memory");
+#else
+	do {
+		roc_load_pair(gw.u64[0], gw.u64[1], ws->base + SSOW_LF_GWS_WQE0);
+	} while (gw.u64[0] & BIT_ULL(63));
+#endif
+
+	ws->gw_rdata = gw.u64[0];
+	if (gw.u64[1])
+		cn20k_sso_hws_post_process(ws, gw.u64, flags);
+	else
+		gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+			    (gw.u64[0] & (0x3FFull << 36)) << 4 | (gw.u64[0] & 0xffffffff);
+
+	ev->event = gw.u64[0];
+	ev->u64 = gw.u64[1];
+
+	return !!gw.u64[1];
+}
 
 /* CN20K Fastpath functions. */
 uint16_t __rte_hot cn20k_sso_hws_enq_burst(void *port, const struct rte_event ev[],
@@ -18,4 +146,11 @@ uint16_t __rte_hot cn20k_sso_hws_enq_new_burst(void *port, const struct rte_even
 uint16_t __rte_hot cn20k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 					       uint16_t nb_events);
 
+uint16_t __rte_hot cn20k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
+					   uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks);
+uint16_t __rte_hot cn20k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
+					       uint16_t nb_events, uint64_t timeout_ticks);
+
 #endif
-- 
2.25.1